pvr: Add support for VK_ATTACHMENT_LOAD_OP_LOAD.
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Reviewed-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21280>
This commit is contained in:
@@ -43,6 +43,7 @@
|
||||
#include "pvr_limits.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_tex_state.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_winsys.h"
|
||||
#include "util/bitscan.h"
|
||||
@@ -51,6 +52,7 @@
|
||||
#include "util/list.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_pack_color.h"
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_command_buffer.h"
|
||||
@@ -601,6 +603,82 @@ err_csb_finish:
|
||||
return result;
|
||||
}
|
||||
|
||||
struct pvr_combined_image_sampler_descriptor {
|
||||
/* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
|
||||
uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
|
||||
union pvr_sampler_descriptor sampler;
|
||||
};
|
||||
|
||||
#define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \
|
||||
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
|
||||
(_size), \
|
||||
"Size of '" #_field_name "' in '" #_struct_type \
|
||||
"' differs from expected")
|
||||
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
PVR_IMAGE_DESCRIPTOR_SIZE * sizeof(uint32_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
|
||||
pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
|
||||
sizeof(uint32_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
|
||||
pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
|
||||
sizeof(uint32_t));
|
||||
|
||||
#undef CHECK_STRUCT_FIELD_SIZE
|
||||
|
||||
static VkResult pvr_setup_texture_state_words(
|
||||
struct pvr_device *device,
|
||||
struct pvr_combined_image_sampler_descriptor *descriptor,
|
||||
const struct pvr_image_view *image_view)
|
||||
{
|
||||
const struct pvr_image *image = vk_to_pvr_image(image_view->vk.image);
|
||||
struct pvr_texture_state_info info = {
|
||||
.format = image_view->vk.format,
|
||||
.mem_layout = image->memlayout,
|
||||
.type = image_view->vk.view_type,
|
||||
.is_cube = image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
|
||||
image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
|
||||
.tex_state_type = PVR_TEXTURE_STATE_SAMPLE,
|
||||
.extent = image_view->vk.extent,
|
||||
.mip_levels = 1,
|
||||
.sample_count = image_view->vk.image->samples,
|
||||
.stride = image->physical_extent.width,
|
||||
.addr = image->dev_addr,
|
||||
};
|
||||
const uint8_t *const swizzle = pvr_get_format_swizzle(info.format);
|
||||
VkResult result;
|
||||
|
||||
memcpy(&info.swizzle, swizzle, sizeof(info.swizzle));
|
||||
|
||||
/* TODO: Can we use image_view->texture_state instead of generating here? */
|
||||
result = pvr_pack_tex_state(device, &info, descriptor->image);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
descriptor->sampler = (union pvr_sampler_descriptor){ 0 };
|
||||
|
||||
pvr_csb_pack (&descriptor->sampler.data.sampler_word,
|
||||
TEXSTATE_SAMPLER,
|
||||
sampler) {
|
||||
sampler.non_normalized_coords = true;
|
||||
sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||
sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
|
||||
sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_load_op *load_op,
|
||||
@@ -612,29 +690,108 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render = load_op->hw_render;
|
||||
const struct pvr_renderpass_colorinit *color_init =
|
||||
&hw_render->color_init[0];
|
||||
const struct pvr_render_pass_attachment *attachment =
|
||||
&pass->attachments[color_init->index];
|
||||
const VkClearValue *clear_value =
|
||||
&render_pass_info->clear_values[color_init->index];
|
||||
uint32_t hw_clear_value[PVR_CLEAR_COLOR_ARRAY_SIZE];
|
||||
uint32_t attachment_count;
|
||||
struct pvr_bo *clear_bo;
|
||||
bool has_depth_clear;
|
||||
bool has_depth_load;
|
||||
VkResult result;
|
||||
|
||||
pvr_finishme("Add missing load op data support");
|
||||
/* These are only setup and never used for now. These will need to be
|
||||
* uploaded into a buffer based on some compiler info.
|
||||
*/
|
||||
/* TODO: Remove the above comment once the compiler is hooked up and we're
|
||||
* setting up + uploading the buffer.
|
||||
*/
|
||||
struct pvr_combined_image_sampler_descriptor
|
||||
texture_states[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS];
|
||||
uint32_t texture_count = 0;
|
||||
uint32_t hw_clear_value[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS *
|
||||
PVR_CLEAR_COLOR_ARRAY_SIZE];
|
||||
uint32_t next_clear_consts = 0;
|
||||
|
||||
assert(load_op->is_hw_object);
|
||||
assert(hw_render->color_init_count == 1);
|
||||
if (load_op->is_hw_object)
|
||||
attachment_count = load_op->hw_render->color_init_count;
|
||||
else
|
||||
attachment_count = load_op->subpass->color_count;
|
||||
|
||||
assert(vk_format_get_blocksize(attachment->vk_format) <=
|
||||
sizeof(hw_clear_value));
|
||||
for (uint32_t i = 0; i < attachment_count; i++) {
|
||||
struct pvr_image_view *image_view;
|
||||
uint32_t attachment_idx;
|
||||
|
||||
/* FIXME: add support for VK_ATTACHMENT_LOAD_OP_LOAD. */
|
||||
assert(color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR);
|
||||
if (load_op->is_hw_object)
|
||||
attachment_idx = load_op->hw_render->color_init[i].index;
|
||||
else
|
||||
attachment_idx = load_op->subpass->color_attachments[i];
|
||||
|
||||
/* FIXME: do this at the point we store the clear values? */
|
||||
pvr_get_hw_clear_color(attachment->vk_format,
|
||||
clear_value->color,
|
||||
hw_clear_value);
|
||||
image_view = render_pass_info->attachments[attachment_idx];
|
||||
|
||||
assert((load_op->clears_loads_state.rt_load_mask &
|
||||
load_op->clears_loads_state.rt_clear_mask) == 0);
|
||||
if (load_op->clears_loads_state.rt_load_mask & BITFIELD_BIT(i)) {
|
||||
result = pvr_setup_texture_state_words(cmd_buffer->device,
|
||||
&texture_states[texture_count],
|
||||
image_view);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
texture_count++;
|
||||
} else if (load_op->clears_loads_state.rt_clear_mask & BITFIELD_BIT(i)) {
|
||||
const uint32_t accum_fmt_size =
|
||||
pvr_get_pbe_accum_format_size_in_bytes(image_view->vk.format);
|
||||
|
||||
assert(next_clear_consts +
|
||||
vk_format_get_blocksize(image_view->vk.format) <=
|
||||
ARRAY_SIZE(hw_clear_value));
|
||||
|
||||
/* FIXME: do this at the point we store the clear values? */
|
||||
pvr_get_hw_clear_color(image_view->vk.format,
|
||||
clear_value->color,
|
||||
&hw_clear_value[next_clear_consts]);
|
||||
|
||||
next_clear_consts += DIV_ROUND_UP(accum_fmt_size, sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
has_depth_load = load_op->clears_loads_state.rt_load_mask != 0;
|
||||
has_depth_clear = load_op->clears_loads_state.depth_clear_to_reg != -1;
|
||||
|
||||
assert(!(has_depth_clear && has_depth_load));
|
||||
|
||||
if (has_depth_load) {
|
||||
const struct pvr_render_pass_attachment *attachment;
|
||||
const struct pvr_image_view *image_view;
|
||||
|
||||
assert(*load_op->subpass->depth_stencil_attachment !=
|
||||
VK_ATTACHMENT_UNUSED);
|
||||
assert(!load_op->is_hw_object);
|
||||
attachment =
|
||||
&pass->attachments[*load_op->subpass->depth_stencil_attachment];
|
||||
|
||||
image_view = render_pass_info->attachments[attachment->index];
|
||||
|
||||
result = pvr_setup_texture_state_words(cmd_buffer->device,
|
||||
&texture_states[texture_count],
|
||||
image_view);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
texture_count++;
|
||||
} else if (has_depth_clear) {
|
||||
const struct pvr_render_pass_attachment *attachment;
|
||||
VkClearValue clear_value;
|
||||
|
||||
assert(*load_op->subpass->depth_stencil_attachment !=
|
||||
VK_ATTACHMENT_UNUSED);
|
||||
attachment =
|
||||
&pass->attachments[*load_op->subpass->depth_stencil_attachment];
|
||||
|
||||
clear_value = render_pass_info->clear_values[attachment->index];
|
||||
|
||||
assert(next_clear_consts < ARRAY_SIZE(hw_clear_value));
|
||||
hw_clear_value[next_clear_consts++] = fui(clear_value.depthStencil.depth);
|
||||
}
|
||||
|
||||
result = pvr_cmd_buffer_upload_general(cmd_buffer,
|
||||
&hw_clear_value[0],
|
||||
|
@@ -32,6 +32,7 @@
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_usc_fragment_shader.h"
|
||||
#include "util/macros.h"
|
||||
#include "rogue/rogue.h"
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_format.h"
|
||||
@@ -205,6 +206,9 @@ VkResult pvr_pds_unitex_state_program_create_and_upload(
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/* TODO: pvr_create_subpass_load_op() and pvr_create_render_load_op() are quite
|
||||
* similar. See if we can dedup them?
|
||||
*/
|
||||
static VkResult
|
||||
pvr_create_subpass_load_op(struct pvr_device *device,
|
||||
const VkAllocationCallbacks *allocator,
|
||||
@@ -226,19 +230,43 @@ pvr_create_subpass_load_op(struct pvr_device *device,
|
||||
if (!load_op)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
if (hw_subpass->z_replicate != -1 &&
|
||||
hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_LOAD) {
|
||||
pvr_finishme("Missing depth 'load' load op");
|
||||
load_op->load_depth = true;
|
||||
load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
|
||||
|
||||
if (hw_subpass->z_replicate != -1) {
|
||||
const int32_t z_replicate = hw_subpass->z_replicate;
|
||||
|
||||
switch (hw_subpass->depth_initop) {
|
||||
case VK_ATTACHMENT_LOAD_OP_LOAD:
|
||||
assert(z_replicate < PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
|
||||
load_op->clears_loads_state.rt_load_mask = BITFIELD_BIT(z_replicate);
|
||||
load_op->clears_loads_state.dest_vk_format[z_replicate] =
|
||||
VK_FORMAT_D32_SFLOAT;
|
||||
break;
|
||||
|
||||
case VK_ATTACHMENT_LOAD_OP_CLEAR:
|
||||
load_op->clears_loads_state.depth_clear_to_reg = z_replicate;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(subpass->color_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
|
||||
for (uint32_t i = 0; i < subpass->color_count; i++) {
|
||||
pvr_finishme("Missing color 'clear' and 'load' load ops");
|
||||
const uint32_t attachment_idx = subpass->color_attachments[i];
|
||||
|
||||
if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
load_op->clear_mask |= 1U << i;
|
||||
else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
|
||||
pvr_finishme("Missing 'load' load op");
|
||||
assert(attachment_idx < pass->attachment_count);
|
||||
load_op->clears_loads_state.dest_vk_format[i] =
|
||||
pass->attachments[attachment_idx].vk_format;
|
||||
|
||||
if (pass->attachments[attachment_idx].sample_count > 1)
|
||||
load_op->clears_loads_state.unresolved_msaa_mask = BITFIELD_BIT(i);
|
||||
|
||||
if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
|
||||
load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
|
||||
else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
load_op->is_hw_object = false;
|
||||
@@ -252,7 +280,8 @@ pvr_create_subpass_load_op(struct pvr_device *device,
|
||||
static VkResult
|
||||
pvr_create_render_load_op(struct pvr_device *device,
|
||||
const VkAllocationCallbacks *allocator,
|
||||
struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
const struct pvr_render_pass *pass,
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render,
|
||||
struct pvr_load_op **const load_op_out)
|
||||
{
|
||||
struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
|
||||
@@ -263,13 +292,23 @@ pvr_create_render_load_op(struct pvr_device *device,
|
||||
if (!load_op)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
|
||||
|
||||
assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
|
||||
for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
|
||||
struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
|
||||
|
||||
if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
load_op->clear_mask |= 1U << i;
|
||||
else if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
|
||||
pvr_finishme("Missing 'load' load op");
|
||||
assert(color_init->index < pass->attachment_count);
|
||||
load_op->clears_loads_state.dest_vk_format[i] =
|
||||
pass->attachments[color_init->index].vk_format;
|
||||
|
||||
if (pass->attachments[color_init->index].sample_count > 1)
|
||||
load_op->clears_loads_state.unresolved_msaa_mask = BITFIELD_BIT(i);
|
||||
|
||||
if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
|
||||
load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
|
||||
else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
load_op->is_hw_object = true;
|
||||
@@ -602,9 +641,6 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
|
||||
assert(!hw_render->load_op);
|
||||
|
||||
if (hw_render->color_init_count != 0U) {
|
||||
/* Add a dummy output register use to the HW render setup if it has no
|
||||
* output registers in use.
|
||||
*/
|
||||
if (!pvr_has_output_register_writes(hw_render)) {
|
||||
const uint32_t last = hw_render->init_setup.num_render_targets;
|
||||
struct usc_mrt_resource *mrt_resources;
|
||||
@@ -638,8 +674,11 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
|
||||
mrt_resources[last].mrt_desc.valid_mask[3U] = ~0;
|
||||
}
|
||||
|
||||
result =
|
||||
pvr_create_render_load_op(device, pAllocator, hw_render, &load_op);
|
||||
result = pvr_create_render_load_op(device,
|
||||
pAllocator,
|
||||
pass,
|
||||
hw_render,
|
||||
&load_op);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free2(&device->vk.alloc, pAllocator, load_op);
|
||||
goto err_load_op_destroy;
|
||||
|
@@ -1389,13 +1389,14 @@ struct pvr_render_pass {
|
||||
uint32_t max_tilebuffer_count;
|
||||
};
|
||||
|
||||
/* Max render targets for the clears loads state in load op.
|
||||
* To account for resolve attachments, double the color attachments.
|
||||
*/
|
||||
#define PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS (PVR_MAX_COLOR_ATTACHMENTS * 2)
|
||||
|
||||
struct pvr_load_op {
|
||||
bool is_hw_object;
|
||||
|
||||
uint32_t clear_mask;
|
||||
|
||||
bool load_depth;
|
||||
|
||||
struct pvr_bo *usc_frag_prog_bo;
|
||||
uint32_t const_shareds_count;
|
||||
uint32_t shareds_dest_offset;
|
||||
@@ -1410,8 +1411,42 @@ struct pvr_load_op {
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render;
|
||||
const struct pvr_render_subpass *subpass;
|
||||
};
|
||||
|
||||
/* TODO: We might not need to keep all of this around. Some stuff might just
|
||||
* be for the compiler to ingest which we can then discard.
|
||||
*/
|
||||
struct {
|
||||
uint16_t rt_clear_mask;
|
||||
uint16_t rt_load_mask;
|
||||
|
||||
uint16_t unresolved_msaa_mask;
|
||||
|
||||
/* The format to write to the output regs. */
|
||||
VkFormat dest_vk_format[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS];
|
||||
|
||||
#define PVR_NO_DEPTH_CLEAR_TO_REG (-1)
|
||||
/* If >= 0, write a depth clear value to the specified pixel output. */
|
||||
int32_t depth_clear_to_reg;
|
||||
} clears_loads_state;
|
||||
};
|
||||
|
||||
#define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits) \
|
||||
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) * 8 >= \
|
||||
_nr_bits, \
|
||||
#_field_name " mask of struct " #_struct_type " too small")
|
||||
|
||||
CHECK_MASK_SIZE(pvr_load_op,
|
||||
clears_loads_state.rt_clear_mask,
|
||||
PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
|
||||
CHECK_MASK_SIZE(pvr_load_op,
|
||||
clears_loads_state.rt_load_mask,
|
||||
PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
|
||||
CHECK_MASK_SIZE(pvr_load_op,
|
||||
clears_loads_state.unresolved_msaa_mask,
|
||||
PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
|
||||
|
||||
#undef CHECK_MASK_SIZE
|
||||
|
||||
uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
|
||||
const struct pvr_physical_device *pdevice,
|
||||
uint32_t fs_common_size,
|
||||
|
@@ -64,7 +64,7 @@ static enum ROGUE_TEXSTATE_SWIZ pvr_get_hw_swizzle(VkComponentSwizzle comp,
|
||||
|
||||
VkResult
|
||||
pvr_pack_tex_state(struct pvr_device *device,
|
||||
struct pvr_texture_state_info *info,
|
||||
const struct pvr_texture_state_info *info,
|
||||
uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS])
|
||||
{
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
|
@@ -106,7 +106,7 @@ struct pvr_texture_state_info {
|
||||
|
||||
VkResult
|
||||
pvr_pack_tex_state(struct pvr_device *device,
|
||||
struct pvr_texture_state_info *info,
|
||||
const struct pvr_texture_state_info *info,
|
||||
uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]);
|
||||
|
||||
#endif /* PVR_TEX_STATE_H */
|
||||
|
Reference in New Issue
Block a user