pvr: Add support for VK_ATTACHMENT_LOAD_OP_LOAD.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21280>
This commit is contained in:
Karmjit Mahil
2023-01-27 18:25:57 +00:00
committed by Marge Bot
parent c75c58e54c
commit e089166776
5 changed files with 270 additions and 39 deletions

View File

@@ -43,6 +43,7 @@
#include "pvr_limits.h"
#include "pvr_pds.h"
#include "pvr_private.h"
#include "pvr_tex_state.h"
#include "pvr_types.h"
#include "pvr_winsys.h"
#include "util/bitscan.h"
@@ -51,6 +52,7 @@
#include "util/list.h"
#include "util/macros.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
#include "util/u_pack_color.h"
#include "vk_alloc.h"
#include "vk_command_buffer.h"
@@ -601,6 +603,82 @@ err_csb_finish:
return result;
}
struct pvr_combined_image_sampler_descriptor {
/* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
union pvr_sampler_descriptor sampler;
};
#define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
(_size), \
"Size of '" #_field_name "' in '" #_struct_type \
"' differs from expected")
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
PVR_IMAGE_DESCRIPTOR_SIZE * sizeof(uint32_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
sizeof(uint32_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
sizeof(uint32_t));
#undef CHECK_STRUCT_FIELD_SIZE
static VkResult pvr_setup_texture_state_words(
struct pvr_device *device,
struct pvr_combined_image_sampler_descriptor *descriptor,
const struct pvr_image_view *image_view)
{
const struct pvr_image *image = vk_to_pvr_image(image_view->vk.image);
struct pvr_texture_state_info info = {
.format = image_view->vk.format,
.mem_layout = image->memlayout,
.type = image_view->vk.view_type,
.is_cube = image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
.tex_state_type = PVR_TEXTURE_STATE_SAMPLE,
.extent = image_view->vk.extent,
.mip_levels = 1,
.sample_count = image_view->vk.image->samples,
.stride = image->physical_extent.width,
.addr = image->dev_addr,
};
const uint8_t *const swizzle = pvr_get_format_swizzle(info.format);
VkResult result;
memcpy(&info.swizzle, swizzle, sizeof(info.swizzle));
/* TODO: Can we use image_view->texture_state instead of generating here? */
result = pvr_pack_tex_state(device, &info, descriptor->image);
if (result != VK_SUCCESS)
return result;
descriptor->sampler = (union pvr_sampler_descriptor){ 0 };
pvr_csb_pack (&descriptor->sampler.data.sampler_word,
TEXSTATE_SAMPLER,
sampler) {
sampler.non_normalized_coords = true;
sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
}
return VK_SUCCESS;
}
static VkResult
pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_load_op *load_op,
@@ -612,29 +690,108 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_renderpass_hwsetup_render *hw_render = load_op->hw_render;
const struct pvr_renderpass_colorinit *color_init =
&hw_render->color_init[0];
const struct pvr_render_pass_attachment *attachment =
&pass->attachments[color_init->index];
const VkClearValue *clear_value =
&render_pass_info->clear_values[color_init->index];
uint32_t hw_clear_value[PVR_CLEAR_COLOR_ARRAY_SIZE];
uint32_t attachment_count;
struct pvr_bo *clear_bo;
bool has_depth_clear;
bool has_depth_load;
VkResult result;
pvr_finishme("Add missing load op data support");
/* These are only setup and never used for now. These will need to be
* uploaded into a buffer based on some compiler info.
*/
/* TODO: Remove the above comment once the compiler is hooked up and we're
* setting up + uploading the buffer.
*/
struct pvr_combined_image_sampler_descriptor
texture_states[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS];
uint32_t texture_count = 0;
uint32_t hw_clear_value[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS *
PVR_CLEAR_COLOR_ARRAY_SIZE];
uint32_t next_clear_consts = 0;
assert(load_op->is_hw_object);
assert(hw_render->color_init_count == 1);
if (load_op->is_hw_object)
attachment_count = load_op->hw_render->color_init_count;
else
attachment_count = load_op->subpass->color_count;
assert(vk_format_get_blocksize(attachment->vk_format) <=
sizeof(hw_clear_value));
for (uint32_t i = 0; i < attachment_count; i++) {
struct pvr_image_view *image_view;
uint32_t attachment_idx;
/* FIXME: add support for VK_ATTACHMENT_LOAD_OP_LOAD. */
assert(color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR);
if (load_op->is_hw_object)
attachment_idx = load_op->hw_render->color_init[i].index;
else
attachment_idx = load_op->subpass->color_attachments[i];
/* FIXME: do this at the point we store the clear values? */
pvr_get_hw_clear_color(attachment->vk_format,
clear_value->color,
hw_clear_value);
image_view = render_pass_info->attachments[attachment_idx];
assert((load_op->clears_loads_state.rt_load_mask &
load_op->clears_loads_state.rt_clear_mask) == 0);
if (load_op->clears_loads_state.rt_load_mask & BITFIELD_BIT(i)) {
result = pvr_setup_texture_state_words(cmd_buffer->device,
&texture_states[texture_count],
image_view);
if (result != VK_SUCCESS)
return result;
texture_count++;
} else if (load_op->clears_loads_state.rt_clear_mask & BITFIELD_BIT(i)) {
const uint32_t accum_fmt_size =
pvr_get_pbe_accum_format_size_in_bytes(image_view->vk.format);
assert(next_clear_consts +
vk_format_get_blocksize(image_view->vk.format) <=
ARRAY_SIZE(hw_clear_value));
/* FIXME: do this at the point we store the clear values? */
pvr_get_hw_clear_color(image_view->vk.format,
clear_value->color,
&hw_clear_value[next_clear_consts]);
next_clear_consts += DIV_ROUND_UP(accum_fmt_size, sizeof(uint32_t));
}
}
has_depth_load = load_op->clears_loads_state.rt_load_mask != 0;
has_depth_clear = load_op->clears_loads_state.depth_clear_to_reg != -1;
assert(!(has_depth_clear && has_depth_load));
if (has_depth_load) {
const struct pvr_render_pass_attachment *attachment;
const struct pvr_image_view *image_view;
assert(*load_op->subpass->depth_stencil_attachment !=
VK_ATTACHMENT_UNUSED);
assert(!load_op->is_hw_object);
attachment =
&pass->attachments[*load_op->subpass->depth_stencil_attachment];
image_view = render_pass_info->attachments[attachment->index];
result = pvr_setup_texture_state_words(cmd_buffer->device,
&texture_states[texture_count],
image_view);
if (result != VK_SUCCESS)
return result;
texture_count++;
} else if (has_depth_clear) {
const struct pvr_render_pass_attachment *attachment;
VkClearValue clear_value;
assert(*load_op->subpass->depth_stencil_attachment !=
VK_ATTACHMENT_UNUSED);
attachment =
&pass->attachments[*load_op->subpass->depth_stencil_attachment];
clear_value = render_pass_info->clear_values[attachment->index];
assert(next_clear_consts < ARRAY_SIZE(hw_clear_value));
hw_clear_value[next_clear_consts++] = fui(clear_value.depthStencil.depth);
}
result = pvr_cmd_buffer_upload_general(cmd_buffer,
&hw_clear_value[0],

View File

@@ -32,6 +32,7 @@
#include "pvr_pds.h"
#include "pvr_private.h"
#include "pvr_usc_fragment_shader.h"
#include "util/macros.h"
#include "rogue/rogue.h"
#include "vk_alloc.h"
#include "vk_format.h"
@@ -205,6 +206,9 @@ VkResult pvr_pds_unitex_state_program_create_and_upload(
return VK_SUCCESS;
}
/* TODO: pvr_create_subpass_load_op() and pvr_create_render_load_op() are quite
* similar. See if we can dedup them?
*/
static VkResult
pvr_create_subpass_load_op(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
@@ -226,19 +230,43 @@ pvr_create_subpass_load_op(struct pvr_device *device,
if (!load_op)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (hw_subpass->z_replicate != -1 &&
hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_LOAD) {
pvr_finishme("Missing depth 'load' load op");
load_op->load_depth = true;
load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
if (hw_subpass->z_replicate != -1) {
const int32_t z_replicate = hw_subpass->z_replicate;
switch (hw_subpass->depth_initop) {
case VK_ATTACHMENT_LOAD_OP_LOAD:
assert(z_replicate < PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
load_op->clears_loads_state.rt_load_mask = BITFIELD_BIT(z_replicate);
load_op->clears_loads_state.dest_vk_format[z_replicate] =
VK_FORMAT_D32_SFLOAT;
break;
case VK_ATTACHMENT_LOAD_OP_CLEAR:
load_op->clears_loads_state.depth_clear_to_reg = z_replicate;
break;
default:
break;
}
}
assert(subpass->color_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
for (uint32_t i = 0; i < subpass->color_count; i++) {
pvr_finishme("Missing color 'clear' and 'load' load ops");
const uint32_t attachment_idx = subpass->color_attachments[i];
if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
load_op->clear_mask |= 1U << i;
else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
pvr_finishme("Missing 'load' load op");
assert(attachment_idx < pass->attachment_count);
load_op->clears_loads_state.dest_vk_format[i] =
pass->attachments[attachment_idx].vk_format;
if (pass->attachments[attachment_idx].sample_count > 1)
load_op->clears_loads_state.unresolved_msaa_mask = BITFIELD_BIT(i);
if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
}
load_op->is_hw_object = false;
@@ -252,7 +280,8 @@ pvr_create_subpass_load_op(struct pvr_device *device,
static VkResult
pvr_create_render_load_op(struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_renderpass_hwsetup_render *hw_render,
const struct pvr_render_pass *pass,
const struct pvr_renderpass_hwsetup_render *hw_render,
struct pvr_load_op **const load_op_out)
{
struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
@@ -263,13 +292,23 @@ pvr_create_render_load_op(struct pvr_device *device,
if (!load_op)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
load_op->clear_mask |= 1U << i;
else if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
pvr_finishme("Missing 'load' load op");
assert(color_init->index < pass->attachment_count);
load_op->clears_loads_state.dest_vk_format[i] =
pass->attachments[color_init->index].vk_format;
if (pass->attachments[color_init->index].sample_count > 1)
load_op->clears_loads_state.unresolved_msaa_mask = BITFIELD_BIT(i);
if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
}
load_op->is_hw_object = true;
@@ -602,9 +641,6 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
assert(!hw_render->load_op);
if (hw_render->color_init_count != 0U) {
/* Add a dummy output register use to the HW render setup if it has no
* output registers in use.
*/
if (!pvr_has_output_register_writes(hw_render)) {
const uint32_t last = hw_render->init_setup.num_render_targets;
struct usc_mrt_resource *mrt_resources;
@@ -638,8 +674,11 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
mrt_resources[last].mrt_desc.valid_mask[3U] = ~0;
}
result =
pvr_create_render_load_op(device, pAllocator, hw_render, &load_op);
result = pvr_create_render_load_op(device,
pAllocator,
pass,
hw_render,
&load_op);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, pAllocator, load_op);
goto err_load_op_destroy;

View File

@@ -1389,13 +1389,14 @@ struct pvr_render_pass {
uint32_t max_tilebuffer_count;
};
/* Max render targets for the clears loads state in load op.
* To account for resolve attachments, double the color attachments.
*/
#define PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS (PVR_MAX_COLOR_ATTACHMENTS * 2)
struct pvr_load_op {
bool is_hw_object;
uint32_t clear_mask;
bool load_depth;
struct pvr_bo *usc_frag_prog_bo;
uint32_t const_shareds_count;
uint32_t shareds_dest_offset;
@@ -1410,8 +1411,42 @@ struct pvr_load_op {
const struct pvr_renderpass_hwsetup_render *hw_render;
const struct pvr_render_subpass *subpass;
};
/* TODO: We might not need to keep all of this around. Some stuff might just
* be for the compiler to ingest which we can then discard.
*/
struct {
uint16_t rt_clear_mask;
uint16_t rt_load_mask;
uint16_t unresolved_msaa_mask;
/* The format to write to the output regs. */
VkFormat dest_vk_format[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS];
#define PVR_NO_DEPTH_CLEAR_TO_REG (-1)
/* If >= 0, write a depth clear value to the specified pixel output. */
int32_t depth_clear_to_reg;
} clears_loads_state;
};
#define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits) \
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) * 8 >= \
_nr_bits, \
#_field_name " mask of struct " #_struct_type " too small")
CHECK_MASK_SIZE(pvr_load_op,
clears_loads_state.rt_clear_mask,
PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
CHECK_MASK_SIZE(pvr_load_op,
clears_loads_state.rt_load_mask,
PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
CHECK_MASK_SIZE(pvr_load_op,
clears_loads_state.unresolved_msaa_mask,
PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
#undef CHECK_MASK_SIZE
uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
const struct pvr_physical_device *pdevice,
uint32_t fs_common_size,

View File

@@ -64,7 +64,7 @@ static enum ROGUE_TEXSTATE_SWIZ pvr_get_hw_swizzle(VkComponentSwizzle comp,
VkResult
pvr_pack_tex_state(struct pvr_device *device,
struct pvr_texture_state_info *info,
const struct pvr_texture_state_info *info,
uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS])
{
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;

View File

@@ -106,7 +106,7 @@ struct pvr_texture_state_info {
VkResult
pvr_pack_tex_state(struct pvr_device *device,
struct pvr_texture_state_info *info,
const struct pvr_texture_state_info *info,
uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]);
#endif /* PVR_TEX_STATE_H */