From 6e2ec310431c31da10ff3e596e173ccc7d3eee05 Mon Sep 17 00:00:00 2001 From: Karmjit Mahil Date: Tue, 4 Oct 2022 16:59:14 +0100 Subject: [PATCH] pvr: Add clear attachment programs in device. Signed-off-by: Karmjit Mahil Reviewed-by: Frank Binns Part-of: --- src/imagination/vulkan/pvr_clear.c | 223 ++++++++++++++++++ src/imagination/vulkan/pvr_private.h | 16 ++ .../vulkan/usc/programs/pvr_shader_factory.h | 51 +++- 3 files changed, 277 insertions(+), 13 deletions(-) diff --git a/src/imagination/vulkan/pvr_clear.c b/src/imagination/vulkan/pvr_clear.c index 5ecd233c321..5f39a3c0aea 100644 --- a/src/imagination/vulkan/pvr_clear.c +++ b/src/imagination/vulkan/pvr_clear.c @@ -29,6 +29,8 @@ #include "pvr_hardcode.h" #include "pvr_pds.h" #include "pvr_private.h" +#include "pvr_shader_factory.h" +#include "pvr_static_shaders.h" #include "vk_alloc.h" #include "vk_log.h" @@ -224,6 +226,218 @@ VkResult pvr_emit_ppp_from_template( return VK_SUCCESS; } +static VkResult +pvr_device_init_clear_attachment_programs(struct pvr_device *device) +{ + const uint32_t pds_prog_alignment = + MAX2(PVRX(TA_STATE_PDS_TEXUNICODEBASE_ADDR_ALIGNMENT), + PVRX(TA_STATE_PDS_SHADERBASE_ADDR_ALIGNMENT)); + struct pvr_device_static_clear_state *clear_state = + &device->static_clear_state; + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + uint32_t pds_texture_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT]; + uint32_t pds_pixel_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT]; + uint32_t usc_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT]; + uint64_t usc_upload_offset; + uint64_t pds_upload_offset; + uint32_t alloc_size = 0; + VkResult result; + uint8_t *ptr; + +#if !defined(NDEBUG) + uint32_t clear_attachment_info_count = 0; + + for (uint32_t i = 0; i < ARRAY_SIZE(clear_attachment_collection); i++) { + if (!clear_attachment_collection[i].info) + continue; + + clear_attachment_info_count++; + } + + assert(clear_attachment_info_count == PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT); +#endif + + /* Upload USC fragment shaders. */ + + for (uint32_t i = 0, offset_idx = 0; + i < ARRAY_SIZE(clear_attachment_collection); + i++) { + if (!clear_attachment_collection[i].info) + continue; + + usc_program_offsets[offset_idx] = alloc_size; + /* TODO: The compiler will likely give us a pre-aligned size for the USC + * shader so don't bother aligning here when it's hooked up. + */ + alloc_size += ALIGN_POT(clear_attachment_collection[i].size, 4); + + offset_idx++; + } + + result = pvr_bo_alloc(device, + device->heaps.usc_heap, + alloc_size, + 4, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &clear_state->usc_clear_attachment_programs); + if (result != VK_SUCCESS) + return result; + + usc_upload_offset = + clear_state->usc_clear_attachment_programs->vma->dev_addr.addr - + device->heaps.usc_heap->base_addr.addr; + ptr = (uint8_t *)clear_state->usc_clear_attachment_programs->bo->map; + + for (uint32_t i = 0, offset_idx = 0; + i < ARRAY_SIZE(clear_attachment_collection); + i++) { + if (!clear_attachment_collection[i].info) + continue; + + memcpy(ptr + usc_program_offsets[offset_idx], + clear_attachment_collection[i].code, + clear_attachment_collection[i].size); + + offset_idx++; + } + + pvr_bo_cpu_unmap(device, clear_state->usc_clear_attachment_programs); + + /* Upload PDS programs. */ + + alloc_size = 0; + + for (uint32_t i = 0, offset_idx = 0; + i < ARRAY_SIZE(clear_attachment_collection); + i++) { + struct pvr_pds_pixel_shader_sa_program texture_pds_program; + struct pvr_pds_kickusc_program pixel_shader_pds_program; + uint32_t program_size; + + if (!clear_attachment_collection[i].info) + continue; + + /* Texture program to load colors. */ + + texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){ + .num_texture_dma_kicks = 1, + }; + + pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&texture_pds_program); + + pds_texture_program_offsets[offset_idx] = alloc_size; + alloc_size += + ALIGN_POT(texture_pds_program.code_size * 4, pds_prog_alignment); + + /* Pixel program to load fragment shader. */ + + pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 }; + + pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control, + usc_upload_offset + usc_program_offsets[offset_idx], + clear_attachment_collection[i].info->temps_required, + PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), + false); + + pvr_pds_set_sizes_pixel_shader(&pixel_shader_pds_program); + + program_size = pixel_shader_pds_program.code_size + + pixel_shader_pds_program.data_size; + program_size *= sizeof(uint32_t); + + pds_pixel_program_offsets[offset_idx] = alloc_size; + alloc_size += ALIGN_POT(program_size, pds_prog_alignment); + + offset_idx++; + } + + result = pvr_bo_alloc(device, + device->heaps.pds_heap, + alloc_size, + pds_prog_alignment, + PVR_BO_ALLOC_FLAG_CPU_MAPPED, + &clear_state->pds_clear_attachment_programs); + if (result != VK_SUCCESS) { + pvr_bo_free(device, clear_state->usc_clear_attachment_programs); + return result; + } + + pds_upload_offset = + clear_state->pds_clear_attachment_programs->vma->dev_addr.addr - + device->heaps.pds_heap->base_addr.addr; + ptr = clear_state->pds_clear_attachment_programs->bo->map; + + for (uint32_t i = 0, offset_idx = 0; + i < ARRAY_SIZE(clear_attachment_collection); + i++) { + struct pvr_pds_pixel_shader_sa_program texture_pds_program; + struct pvr_pds_kickusc_program pixel_shader_pds_program; + + if (!clear_attachment_collection[i].info) { + clear_state->pds_clear_attachment_program_info[i] = + (struct pvr_pds_clear_attachment_program_info){ 0 }; + + continue; + } + + /* Texture program to load colors. */ + + texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){ + .num_texture_dma_kicks = 1, + }; + + pvr_pds_generate_pixel_shader_sa_code_segment( + &texture_pds_program, + (uint32_t *)(ptr + pds_texture_program_offsets[offset_idx])); + + /* Pixel program to load fragment shader. */ + + pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 }; + + pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control, + usc_upload_offset + usc_program_offsets[offset_idx], + clear_attachment_collection[i].info->temps_required, + PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), + false); + + pvr_pds_generate_pixel_shader_program( + &pixel_shader_pds_program, + (uint32_t *)(ptr + pds_pixel_program_offsets[offset_idx])); + + /* Setup the PDS program info. */ + + pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_pds_program, + dev_info); + + clear_state->pds_clear_attachment_program_info[i] = + (struct pvr_pds_clear_attachment_program_info){ + .texture_program_offset = PVR_DEV_ADDR( + pds_upload_offset + pds_texture_program_offsets[offset_idx]), + .pixel_program_offset = PVR_DEV_ADDR( + pds_upload_offset + pds_pixel_program_offsets[offset_idx]), + + .texture_program_pds_temps_count = texture_pds_program.temps_used, + .texture_program_data_size = texture_pds_program.data_size, + }; + + offset_idx++; + } + + pvr_bo_cpu_unmap(device, clear_state->pds_clear_attachment_programs); + + return VK_SUCCESS; +} + +static void +pvr_device_finish_clear_attachment_programs(struct pvr_device *device) +{ + struct pvr_device_static_clear_state *clear_state = + &device->static_clear_state; + + pvr_bo_free(device, clear_state->usc_clear_attachment_programs); + pvr_bo_free(device, clear_state->pds_clear_attachment_programs); +} + /** * \brief Generate and uploads vertices required to clear the rect area. * @@ -362,8 +576,15 @@ VkResult pvr_device_init_graphics_static_clear_state(struct pvr_device *device) 1, state->large_clear_vdm_words); + result = pvr_device_init_clear_attachment_programs(device); + if (result != VK_SUCCESS) + goto err_free_pds_program; + return VK_SUCCESS; +err_free_pds_program: + pvr_bo_free(device, state->pds.pvr_bo); + err_free_vertices_buffer: pvr_bo_free(device, state->vertices_bo); @@ -380,6 +601,8 @@ void pvr_device_finish_graphics_static_clear_state(struct pvr_device *device) { struct pvr_device_static_clear_state *state = &device->static_clear_state; + pvr_device_finish_clear_attachment_programs(device); + pvr_bo_free(device, state->pds.pvr_bo); pvr_bo_free(device, state->vertices_bo); pvr_bo_free(device, state->usc_vertex_shader_bo); diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 5a4c277f44b..83a7f5c9599 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -46,6 +46,7 @@ #include "pvr_job_render.h" #include "pvr_limits.h" #include "pvr_pds.h" +#include "pvr_shader_factory.h" #include "pvr_types.h" #include "pvr_winsys.h" #include "rogue/rogue.h" @@ -346,6 +347,21 @@ struct pvr_device { uint32_t vdm_words[PVR_CLEAR_VDM_STATE_DWORD_COUNT]; uint32_t large_clear_vdm_words[PVR_CLEAR_VDM_STATE_DWORD_COUNT]; + + struct pvr_bo *usc_clear_attachment_programs; + struct pvr_bo *pds_clear_attachment_programs; + /* TODO: See if we can use PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT to save some + * memory. + */ + struct pvr_pds_clear_attachment_program_info { + pvr_dev_addr_t texture_program_offset; + pvr_dev_addr_t pixel_program_offset; + + uint32_t texture_program_pds_temps_count; + /* Size in dwords. */ + uint32_t texture_program_data_size; + } pds_clear_attachment_program_info + [PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES]; } static_clear_state; struct { diff --git a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h index be3548b3c29..56710a723ce 100644 --- a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h +++ b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h @@ -25,6 +25,9 @@ #define PVR_SHADER_FACTORY_H #include +#include + +#include "util/bitpack_helpers.h" /* Occlusion query availability writes. */ enum pvr_query_availability_write_pool_const { @@ -78,22 +81,44 @@ enum pvr_clear_attachment_const { */ #define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES 64 -#define PVR_CLEAR_ATTACHMENT_PROGRAM_DWORDS_SHIFT (4U) -#define PVR_CLEAR_ATTACHMENT_PROGRAM_DWORDS_SETMASK (0x30U) -#define PVR_CLEAR_ATTACHMENT_PROGRAM_OFFSET_SHIFT (1U) -#define PVR_CLEAR_ATTACHMENT_PROGRAM_OFFSET_SETMASK (0x0EU) -#define PVR_CLEAR_ATTACHMENT_PROGRAM_DEST_SHIFT (0U) -#define PVR_CLEAR_ATTACHMENT_PROGRAM_DEST_SETMASK (0x01U) - +/** + * \brief Returns the index of the clear attachment USC program. + * + * For shaders which use output registers "dword_count" is essentially the + * count of output registers to use, and "offset" is the first output reg to + * use. E.g. dword_count 3, offset 1, will use o1, o2, o3. + * + * For shaders which use tile buffers as the destination "dword_count" is the + * the amount of dwords to write to the tile buffer and "offset" is the offset + * at which to start writing at. + */ static inline uint32_t -pvr_get_clear_attachment_program_index(uint32_t dwords, +pvr_get_clear_attachment_program_index(uint32_t dword_count, uint32_t offset, - uint8_t tile_buffer) + bool uses_tile_buffer) { - return (((dwords - 1) << PVR_CLEAR_ATTACHMENT_PROGRAM_DWORDS_SHIFT) | - (offset << PVR_CLEAR_ATTACHMENT_PROGRAM_OFFSET_SHIFT) | - (tile_buffer << PVR_CLEAR_ATTACHMENT_PROGRAM_DEST_SHIFT)) & - 0x3f; + /* dest - Clear on chip or in memory. + * offset - Clear offset 0..7 . + * dword_count - Clear from 1..4 dwords. + */ + const uint32_t dest_start = 0; + const uint32_t dest_end = 0; + + const uint32_t offset_start = 1; + const uint32_t offset_end = 3; + + const uint32_t dword_count_start = 4; + const uint32_t dword_count_end = 5; + + uint32_t idx = 0; + + dword_count -= 1; + + idx |= util_bitpack_uint(uses_tile_buffer, dest_start, dest_end); + idx |= util_bitpack_uint(offset, offset_start, offset_end); + idx |= util_bitpack_uint(dword_count, dword_count_start, dword_count_end); + + return idx; } #endif /* PVR_SHADER_FACTORY_H */