pvr: Add clear attachment programs in device.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20055>
This commit is contained in:
Karmjit Mahil
2022-10-04 16:59:14 +01:00
committed by Marge Bot
parent 72151aa426
commit 6e2ec31043
3 changed files with 277 additions and 13 deletions

View File

@@ -29,6 +29,8 @@
#include "pvr_hardcode.h"
#include "pvr_pds.h"
#include "pvr_private.h"
#include "pvr_shader_factory.h"
#include "pvr_static_shaders.h"
#include "vk_alloc.h"
#include "vk_log.h"
@@ -224,6 +226,218 @@ VkResult pvr_emit_ppp_from_template(
return VK_SUCCESS;
}
static VkResult
pvr_device_init_clear_attachment_programs(struct pvr_device *device)
{
const uint32_t pds_prog_alignment =
MAX2(PVRX(TA_STATE_PDS_TEXUNICODEBASE_ADDR_ALIGNMENT),
PVRX(TA_STATE_PDS_SHADERBASE_ADDR_ALIGNMENT));
struct pvr_device_static_clear_state *clear_state =
&device->static_clear_state;
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
uint32_t pds_texture_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
uint32_t pds_pixel_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
uint32_t usc_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
uint64_t usc_upload_offset;
uint64_t pds_upload_offset;
uint32_t alloc_size = 0;
VkResult result;
uint8_t *ptr;
#if !defined(NDEBUG)
uint32_t clear_attachment_info_count = 0;
for (uint32_t i = 0; i < ARRAY_SIZE(clear_attachment_collection); i++) {
if (!clear_attachment_collection[i].info)
continue;
clear_attachment_info_count++;
}
assert(clear_attachment_info_count == PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT);
#endif
/* Upload USC fragment shaders. */
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
if (!clear_attachment_collection[i].info)
continue;
usc_program_offsets[offset_idx] = alloc_size;
/* TODO: The compiler will likely give us a pre-aligned size for the USC
* shader so don't bother aligning here when it's hooked up.
*/
alloc_size += ALIGN_POT(clear_attachment_collection[i].size, 4);
offset_idx++;
}
result = pvr_bo_alloc(device,
device->heaps.usc_heap,
alloc_size,
4,
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
&clear_state->usc_clear_attachment_programs);
if (result != VK_SUCCESS)
return result;
usc_upload_offset =
clear_state->usc_clear_attachment_programs->vma->dev_addr.addr -
device->heaps.usc_heap->base_addr.addr;
ptr = (uint8_t *)clear_state->usc_clear_attachment_programs->bo->map;
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
if (!clear_attachment_collection[i].info)
continue;
memcpy(ptr + usc_program_offsets[offset_idx],
clear_attachment_collection[i].code,
clear_attachment_collection[i].size);
offset_idx++;
}
pvr_bo_cpu_unmap(device, clear_state->usc_clear_attachment_programs);
/* Upload PDS programs. */
alloc_size = 0;
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
struct pvr_pds_kickusc_program pixel_shader_pds_program;
uint32_t program_size;
if (!clear_attachment_collection[i].info)
continue;
/* Texture program to load colors. */
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
.num_texture_dma_kicks = 1,
};
pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&texture_pds_program);
pds_texture_program_offsets[offset_idx] = alloc_size;
alloc_size +=
ALIGN_POT(texture_pds_program.code_size * 4, pds_prog_alignment);
/* Pixel program to load fragment shader. */
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
usc_upload_offset + usc_program_offsets[offset_idx],
clear_attachment_collection[i].info->temps_required,
PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
false);
pvr_pds_set_sizes_pixel_shader(&pixel_shader_pds_program);
program_size = pixel_shader_pds_program.code_size +
pixel_shader_pds_program.data_size;
program_size *= sizeof(uint32_t);
pds_pixel_program_offsets[offset_idx] = alloc_size;
alloc_size += ALIGN_POT(program_size, pds_prog_alignment);
offset_idx++;
}
result = pvr_bo_alloc(device,
device->heaps.pds_heap,
alloc_size,
pds_prog_alignment,
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
&clear_state->pds_clear_attachment_programs);
if (result != VK_SUCCESS) {
pvr_bo_free(device, clear_state->usc_clear_attachment_programs);
return result;
}
pds_upload_offset =
clear_state->pds_clear_attachment_programs->vma->dev_addr.addr -
device->heaps.pds_heap->base_addr.addr;
ptr = clear_state->pds_clear_attachment_programs->bo->map;
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
struct pvr_pds_kickusc_program pixel_shader_pds_program;
if (!clear_attachment_collection[i].info) {
clear_state->pds_clear_attachment_program_info[i] =
(struct pvr_pds_clear_attachment_program_info){ 0 };
continue;
}
/* Texture program to load colors. */
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
.num_texture_dma_kicks = 1,
};
pvr_pds_generate_pixel_shader_sa_code_segment(
&texture_pds_program,
(uint32_t *)(ptr + pds_texture_program_offsets[offset_idx]));
/* Pixel program to load fragment shader. */
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
usc_upload_offset + usc_program_offsets[offset_idx],
clear_attachment_collection[i].info->temps_required,
PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
false);
pvr_pds_generate_pixel_shader_program(
&pixel_shader_pds_program,
(uint32_t *)(ptr + pds_pixel_program_offsets[offset_idx]));
/* Setup the PDS program info. */
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_pds_program,
dev_info);
clear_state->pds_clear_attachment_program_info[i] =
(struct pvr_pds_clear_attachment_program_info){
.texture_program_offset = PVR_DEV_ADDR(
pds_upload_offset + pds_texture_program_offsets[offset_idx]),
.pixel_program_offset = PVR_DEV_ADDR(
pds_upload_offset + pds_pixel_program_offsets[offset_idx]),
.texture_program_pds_temps_count = texture_pds_program.temps_used,
.texture_program_data_size = texture_pds_program.data_size,
};
offset_idx++;
}
pvr_bo_cpu_unmap(device, clear_state->pds_clear_attachment_programs);
return VK_SUCCESS;
}
static void
pvr_device_finish_clear_attachment_programs(struct pvr_device *device)
{
struct pvr_device_static_clear_state *clear_state =
&device->static_clear_state;
pvr_bo_free(device, clear_state->usc_clear_attachment_programs);
pvr_bo_free(device, clear_state->pds_clear_attachment_programs);
}
/**
* \brief Generate and uploads vertices required to clear the rect area.
*
@@ -362,8 +576,15 @@ VkResult pvr_device_init_graphics_static_clear_state(struct pvr_device *device)
1,
state->large_clear_vdm_words);
result = pvr_device_init_clear_attachment_programs(device);
if (result != VK_SUCCESS)
goto err_free_pds_program;
return VK_SUCCESS;
err_free_pds_program:
pvr_bo_free(device, state->pds.pvr_bo);
err_free_vertices_buffer:
pvr_bo_free(device, state->vertices_bo);
@@ -380,6 +601,8 @@ void pvr_device_finish_graphics_static_clear_state(struct pvr_device *device)
{
struct pvr_device_static_clear_state *state = &device->static_clear_state;
pvr_device_finish_clear_attachment_programs(device);
pvr_bo_free(device, state->pds.pvr_bo);
pvr_bo_free(device, state->vertices_bo);
pvr_bo_free(device, state->usc_vertex_shader_bo);

View File

@@ -46,6 +46,7 @@
#include "pvr_job_render.h"
#include "pvr_limits.h"
#include "pvr_pds.h"
#include "pvr_shader_factory.h"
#include "pvr_types.h"
#include "pvr_winsys.h"
#include "rogue/rogue.h"
@@ -346,6 +347,21 @@ struct pvr_device {
uint32_t vdm_words[PVR_CLEAR_VDM_STATE_DWORD_COUNT];
uint32_t large_clear_vdm_words[PVR_CLEAR_VDM_STATE_DWORD_COUNT];
struct pvr_bo *usc_clear_attachment_programs;
struct pvr_bo *pds_clear_attachment_programs;
/* TODO: See if we can use PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT to save some
* memory.
*/
struct pvr_pds_clear_attachment_program_info {
pvr_dev_addr_t texture_program_offset;
pvr_dev_addr_t pixel_program_offset;
uint32_t texture_program_pds_temps_count;
/* Size in dwords. */
uint32_t texture_program_data_size;
} pds_clear_attachment_program_info
[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES];
} static_clear_state;
struct {

View File

@@ -25,6 +25,9 @@
#define PVR_SHADER_FACTORY_H
#include <stdint.h>
#include <stdbool.h>
#include "util/bitpack_helpers.h"
/* Occlusion query availability writes. */
enum pvr_query_availability_write_pool_const {
@@ -78,22 +81,44 @@ enum pvr_clear_attachment_const {
*/
#define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES 64
#define PVR_CLEAR_ATTACHMENT_PROGRAM_DWORDS_SHIFT (4U)
#define PVR_CLEAR_ATTACHMENT_PROGRAM_DWORDS_SETMASK (0x30U)
#define PVR_CLEAR_ATTACHMENT_PROGRAM_OFFSET_SHIFT (1U)
#define PVR_CLEAR_ATTACHMENT_PROGRAM_OFFSET_SETMASK (0x0EU)
#define PVR_CLEAR_ATTACHMENT_PROGRAM_DEST_SHIFT (0U)
#define PVR_CLEAR_ATTACHMENT_PROGRAM_DEST_SETMASK (0x01U)
/**
* \brief Returns the index of the clear attachment USC program.
*
* For shaders which use output registers "dword_count" is essentially the
* count of output registers to use, and "offset" is the first output reg to
* use. E.g. dword_count 3, offset 1, will use o1, o2, o3.
*
* For shaders which use tile buffers as the destination "dword_count" is the
* the amount of dwords to write to the tile buffer and "offset" is the offset
* at which to start writing at.
*/
static inline uint32_t
pvr_get_clear_attachment_program_index(uint32_t dwords,
pvr_get_clear_attachment_program_index(uint32_t dword_count,
uint32_t offset,
uint8_t tile_buffer)
bool uses_tile_buffer)
{
return (((dwords - 1) << PVR_CLEAR_ATTACHMENT_PROGRAM_DWORDS_SHIFT) |
(offset << PVR_CLEAR_ATTACHMENT_PROGRAM_OFFSET_SHIFT) |
(tile_buffer << PVR_CLEAR_ATTACHMENT_PROGRAM_DEST_SHIFT)) &
0x3f;
/* dest - Clear on chip or in memory.
* offset - Clear offset 0..7 .
* dword_count - Clear from 1..4 dwords.
*/
const uint32_t dest_start = 0;
const uint32_t dest_end = 0;
const uint32_t offset_start = 1;
const uint32_t offset_end = 3;
const uint32_t dword_count_start = 4;
const uint32_t dword_count_end = 5;
uint32_t idx = 0;
dword_count -= 1;
idx |= util_bitpack_uint(uses_tile_buffer, dest_start, dest_end);
idx |= util_bitpack_uint(offset, offset_start, offset_end);
idx |= util_bitpack_uint(dword_count, dword_count_start, dword_count_end);
return idx;
}
#endif /* PVR_SHADER_FACTORY_H */