diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 4ccfc678133..509a180c1fc 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -43,6 +43,7 @@ #include "pvr_limits.h" #include "pvr_pds.h" #include "pvr_private.h" +#include "pvr_tex_state.h" #include "pvr_types.h" #include "pvr_winsys.h" #include "util/bitscan.h" @@ -51,6 +52,7 @@ #include "util/list.h" #include "util/macros.h" #include "util/u_dynarray.h" +#include "util/u_math.h" #include "util/u_pack_color.h" #include "vk_alloc.h" #include "vk_command_buffer.h" @@ -601,6 +603,82 @@ err_csb_finish: return result; } +struct pvr_combined_image_sampler_descriptor { + /* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */ + uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS]; + union pvr_sampler_descriptor sampler; +}; + +#define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \ + static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \ + (_size), \ + "Size of '" #_field_name "' in '" #_struct_type \ + "' differs from expected") + +CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor, + image, + ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t)); +CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor, + image, + PVR_IMAGE_DESCRIPTOR_SIZE * sizeof(uint32_t)); +CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor, + image, + (pvr_cmd_length(TEXSTATE_IMAGE_WORD0) + + pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) * + sizeof(uint32_t)); +CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor, + image, + (pvr_cmd_length(TEXSTATE_IMAGE_WORD0) + + pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) * + sizeof(uint32_t)); + +#undef CHECK_STRUCT_FIELD_SIZE + +static VkResult pvr_setup_texture_state_words( + struct pvr_device *device, + struct pvr_combined_image_sampler_descriptor *descriptor, + const struct pvr_image_view *image_view) +{ + const struct pvr_image *image = vk_to_pvr_image(image_view->vk.image); + struct pvr_texture_state_info info = { + .format = image_view->vk.format, + .mem_layout = image->memlayout, + .type = image_view->vk.view_type, + .is_cube = image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE || + image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, + .tex_state_type = PVR_TEXTURE_STATE_SAMPLE, + .extent = image_view->vk.extent, + .mip_levels = 1, + .sample_count = image_view->vk.image->samples, + .stride = image->physical_extent.width, + .addr = image->dev_addr, + }; + const uint8_t *const swizzle = pvr_get_format_swizzle(info.format); + VkResult result; + + memcpy(&info.swizzle, swizzle, sizeof(info.swizzle)); + + /* TODO: Can we use image_view->texture_state instead of generating here? */ + result = pvr_pack_tex_state(device, &info, descriptor->image); + if (result != VK_SUCCESS) + return result; + + descriptor->sampler = (union pvr_sampler_descriptor){ 0 }; + + pvr_csb_pack (&descriptor->sampler.data.sampler_word, + TEXSTATE_SAMPLER, + sampler) { + sampler.non_normalized_coords = true; + sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); + sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); + sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT); + sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT); + sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT); + } + + return VK_SUCCESS; +} + static VkResult pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_load_op *load_op, @@ -612,29 +690,108 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_renderpass_hwsetup_render *hw_render = load_op->hw_render; const struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[0]; - const struct pvr_render_pass_attachment *attachment = - &pass->attachments[color_init->index]; const VkClearValue *clear_value = &render_pass_info->clear_values[color_init->index]; - uint32_t hw_clear_value[PVR_CLEAR_COLOR_ARRAY_SIZE]; + uint32_t attachment_count; struct pvr_bo *clear_bo; + bool has_depth_clear; + bool has_depth_load; VkResult result; - pvr_finishme("Add missing load op data support"); + /* These are only setup and never used for now. These will need to be + * uploaded into a buffer based on some compiler info. + */ + /* TODO: Remove the above comment once the compiler is hooked up and we're + * setting up + uploading the buffer. + */ + struct pvr_combined_image_sampler_descriptor + texture_states[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS]; + uint32_t texture_count = 0; + uint32_t hw_clear_value[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS * + PVR_CLEAR_COLOR_ARRAY_SIZE]; + uint32_t next_clear_consts = 0; - assert(load_op->is_hw_object); - assert(hw_render->color_init_count == 1); + if (load_op->is_hw_object) + attachment_count = load_op->hw_render->color_init_count; + else + attachment_count = load_op->subpass->color_count; - assert(vk_format_get_blocksize(attachment->vk_format) <= - sizeof(hw_clear_value)); + for (uint32_t i = 0; i < attachment_count; i++) { + struct pvr_image_view *image_view; + uint32_t attachment_idx; - /* FIXME: add support for VK_ATTACHMENT_LOAD_OP_LOAD. */ - assert(color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR); + if (load_op->is_hw_object) + attachment_idx = load_op->hw_render->color_init[i].index; + else + attachment_idx = load_op->subpass->color_attachments[i]; - /* FIXME: do this at the point we store the clear values? */ - pvr_get_hw_clear_color(attachment->vk_format, - clear_value->color, - hw_clear_value); + image_view = render_pass_info->attachments[attachment_idx]; + + assert((load_op->clears_loads_state.rt_load_mask & + load_op->clears_loads_state.rt_clear_mask) == 0); + if (load_op->clears_loads_state.rt_load_mask & BITFIELD_BIT(i)) { + result = pvr_setup_texture_state_words(cmd_buffer->device, + &texture_states[texture_count], + image_view); + if (result != VK_SUCCESS) + return result; + + texture_count++; + } else if (load_op->clears_loads_state.rt_clear_mask & BITFIELD_BIT(i)) { + const uint32_t accum_fmt_size = + pvr_get_pbe_accum_format_size_in_bytes(image_view->vk.format); + + assert(next_clear_consts + + vk_format_get_blocksize(image_view->vk.format) <= + ARRAY_SIZE(hw_clear_value)); + + /* FIXME: do this at the point we store the clear values? */ + pvr_get_hw_clear_color(image_view->vk.format, + clear_value->color, + &hw_clear_value[next_clear_consts]); + + next_clear_consts += DIV_ROUND_UP(accum_fmt_size, sizeof(uint32_t)); + } + } + + has_depth_load = load_op->clears_loads_state.rt_load_mask != 0; + has_depth_clear = load_op->clears_loads_state.depth_clear_to_reg != -1; + + assert(!(has_depth_clear && has_depth_load)); + + if (has_depth_load) { + const struct pvr_render_pass_attachment *attachment; + const struct pvr_image_view *image_view; + + assert(*load_op->subpass->depth_stencil_attachment != + VK_ATTACHMENT_UNUSED); + assert(!load_op->is_hw_object); + attachment = + &pass->attachments[*load_op->subpass->depth_stencil_attachment]; + + image_view = render_pass_info->attachments[attachment->index]; + + result = pvr_setup_texture_state_words(cmd_buffer->device, + &texture_states[texture_count], + image_view); + if (result != VK_SUCCESS) + return result; + + texture_count++; + } else if (has_depth_clear) { + const struct pvr_render_pass_attachment *attachment; + VkClearValue clear_value; + + assert(*load_op->subpass->depth_stencil_attachment != + VK_ATTACHMENT_UNUSED); + attachment = + &pass->attachments[*load_op->subpass->depth_stencil_attachment]; + + clear_value = render_pass_info->clear_values[attachment->index]; + + assert(next_clear_consts < ARRAY_SIZE(hw_clear_value)); + hw_clear_value[next_clear_consts++] = fui(clear_value.depthStencil.depth); + } result = pvr_cmd_buffer_upload_general(cmd_buffer, &hw_clear_value[0], diff --git a/src/imagination/vulkan/pvr_pass.c b/src/imagination/vulkan/pvr_pass.c index f9702df5ecf..2e3ccf2d956 100644 --- a/src/imagination/vulkan/pvr_pass.c +++ b/src/imagination/vulkan/pvr_pass.c @@ -32,6 +32,7 @@ #include "pvr_pds.h" #include "pvr_private.h" #include "pvr_usc_fragment_shader.h" +#include "util/macros.h" #include "rogue/rogue.h" #include "vk_alloc.h" #include "vk_format.h" @@ -205,6 +206,9 @@ VkResult pvr_pds_unitex_state_program_create_and_upload( return VK_SUCCESS; } +/* TODO: pvr_create_subpass_load_op() and pvr_create_render_load_op() are quite + * similar. See if we can dedup them? + */ static VkResult pvr_create_subpass_load_op(struct pvr_device *device, const VkAllocationCallbacks *allocator, @@ -226,19 +230,43 @@ pvr_create_subpass_load_op(struct pvr_device *device, if (!load_op) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - if (hw_subpass->z_replicate != -1 && - hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_LOAD) { - pvr_finishme("Missing depth 'load' load op"); - load_op->load_depth = true; + load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG; + + if (hw_subpass->z_replicate != -1) { + const int32_t z_replicate = hw_subpass->z_replicate; + + switch (hw_subpass->depth_initop) { + case VK_ATTACHMENT_LOAD_OP_LOAD: + assert(z_replicate < PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); + load_op->clears_loads_state.rt_load_mask = BITFIELD_BIT(z_replicate); + load_op->clears_loads_state.dest_vk_format[z_replicate] = + VK_FORMAT_D32_SFLOAT; + break; + + case VK_ATTACHMENT_LOAD_OP_CLEAR: + load_op->clears_loads_state.depth_clear_to_reg = z_replicate; + break; + + default: + break; + } } + assert(subpass->color_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); for (uint32_t i = 0; i < subpass->color_count; i++) { - pvr_finishme("Missing color 'clear' and 'load' load ops"); + const uint32_t attachment_idx = subpass->color_attachments[i]; - if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR) - load_op->clear_mask |= 1U << i; - else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD) - pvr_finishme("Missing 'load' load op"); + assert(attachment_idx < pass->attachment_count); + load_op->clears_loads_state.dest_vk_format[i] = + pass->attachments[attachment_idx].vk_format; + + if (pass->attachments[attachment_idx].sample_count > 1) + load_op->clears_loads_state.unresolved_msaa_mask = BITFIELD_BIT(i); + + if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD) + load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i); + else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR) + load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i); } load_op->is_hw_object = false; @@ -252,7 +280,8 @@ pvr_create_subpass_load_op(struct pvr_device *device, static VkResult pvr_create_render_load_op(struct pvr_device *device, const VkAllocationCallbacks *allocator, - struct pvr_renderpass_hwsetup_render *hw_render, + const struct pvr_render_pass *pass, + const struct pvr_renderpass_hwsetup_render *hw_render, struct pvr_load_op **const load_op_out) { struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc, @@ -263,13 +292,23 @@ pvr_create_render_load_op(struct pvr_device *device, if (!load_op) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG; + + assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); for (uint32_t i = 0; i < hw_render->color_init_count; i++) { struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i]; - if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR) - load_op->clear_mask |= 1U << i; - else if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD) - pvr_finishme("Missing 'load' load op"); + assert(color_init->index < pass->attachment_count); + load_op->clears_loads_state.dest_vk_format[i] = + pass->attachments[color_init->index].vk_format; + + if (pass->attachments[color_init->index].sample_count > 1) + load_op->clears_loads_state.unresolved_msaa_mask = BITFIELD_BIT(i); + + if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD) + load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i); + else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR) + load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i); } load_op->is_hw_object = true; @@ -602,9 +641,6 @@ VkResult pvr_CreateRenderPass2(VkDevice _device, assert(!hw_render->load_op); if (hw_render->color_init_count != 0U) { - /* Add a dummy output register use to the HW render setup if it has no - * output registers in use. - */ if (!pvr_has_output_register_writes(hw_render)) { const uint32_t last = hw_render->init_setup.num_render_targets; struct usc_mrt_resource *mrt_resources; @@ -638,8 +674,11 @@ VkResult pvr_CreateRenderPass2(VkDevice _device, mrt_resources[last].mrt_desc.valid_mask[3U] = ~0; } - result = - pvr_create_render_load_op(device, pAllocator, hw_render, &load_op); + result = pvr_create_render_load_op(device, + pAllocator, + pass, + hw_render, + &load_op); if (result != VK_SUCCESS) { vk_free2(&device->vk.alloc, pAllocator, load_op); goto err_load_op_destroy; diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index e71ee78fe98..27e6579c3de 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -1389,13 +1389,14 @@ struct pvr_render_pass { uint32_t max_tilebuffer_count; }; +/* Max render targets for the clears loads state in load op. + * To account for resolve attachments, double the color attachments. + */ +#define PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS (PVR_MAX_COLOR_ATTACHMENTS * 2) + struct pvr_load_op { bool is_hw_object; - uint32_t clear_mask; - - bool load_depth; - struct pvr_bo *usc_frag_prog_bo; uint32_t const_shareds_count; uint32_t shareds_dest_offset; @@ -1410,8 +1411,42 @@ struct pvr_load_op { const struct pvr_renderpass_hwsetup_render *hw_render; const struct pvr_render_subpass *subpass; }; + + /* TODO: We might not need to keep all of this around. Some stuff might just + * be for the compiler to ingest which we can then discard. + */ + struct { + uint16_t rt_clear_mask; + uint16_t rt_load_mask; + + uint16_t unresolved_msaa_mask; + + /* The format to write to the output regs. */ + VkFormat dest_vk_format[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS]; + +#define PVR_NO_DEPTH_CLEAR_TO_REG (-1) + /* If >= 0, write a depth clear value to the specified pixel output. */ + int32_t depth_clear_to_reg; + } clears_loads_state; }; +#define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits) \ + static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) * 8 >= \ + _nr_bits, \ + #_field_name " mask of struct " #_struct_type " too small") + +CHECK_MASK_SIZE(pvr_load_op, + clears_loads_state.rt_clear_mask, + PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); +CHECK_MASK_SIZE(pvr_load_op, + clears_loads_state.rt_load_mask, + PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); +CHECK_MASK_SIZE(pvr_load_op, + clears_loads_state.unresolved_msaa_mask, + PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); + +#undef CHECK_MASK_SIZE + uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( const struct pvr_physical_device *pdevice, uint32_t fs_common_size, diff --git a/src/imagination/vulkan/pvr_tex_state.c b/src/imagination/vulkan/pvr_tex_state.c index 1d99200f70f..c2a2f7601d7 100644 --- a/src/imagination/vulkan/pvr_tex_state.c +++ b/src/imagination/vulkan/pvr_tex_state.c @@ -64,7 +64,7 @@ static enum ROGUE_TEXSTATE_SWIZ pvr_get_hw_swizzle(VkComponentSwizzle comp, VkResult pvr_pack_tex_state(struct pvr_device *device, - struct pvr_texture_state_info *info, + const struct pvr_texture_state_info *info, uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]) { const struct pvr_device_info *dev_info = &device->pdevice->dev_info; diff --git a/src/imagination/vulkan/pvr_tex_state.h b/src/imagination/vulkan/pvr_tex_state.h index 66f872eb9a6..24e199b1014 100644 --- a/src/imagination/vulkan/pvr_tex_state.h +++ b/src/imagination/vulkan/pvr_tex_state.h @@ -106,7 +106,7 @@ struct pvr_texture_state_info { VkResult pvr_pack_tex_state(struct pvr_device *device, - struct pvr_texture_state_info *info, + const struct pvr_texture_state_info *info, uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]); #endif /* PVR_TEX_STATE_H */