pvr: Add support for generating render pass hw setup data.

Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18707>
This commit is contained in:
Rajnesh Kanwal
2022-07-05 12:26:37 +01:00
parent b57cd62698
commit 10b6a0d567
9 changed files with 2812 additions and 154 deletions

View File

@@ -137,6 +137,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_num_clusters = true,
.has_num_raster_pipes = true,
.has_num_user_clip_planes = true,
.has_pbe2_in_xe = true,
.has_roguexe = true,
.has_screen_size8K = true,
.has_simple_internal_parameter_format = true,
@@ -216,6 +217,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_num_raster_pipes = true,
.has_num_user_clip_planes = true,
.has_paired_tiles = true,
.has_pbe2_in_xe = true,
.has_pds_ddmadt = true,
.has_roguexe = true,
.has_screen_size8K = true,

View File

@@ -267,6 +267,7 @@ struct pvr_device_features {
bool has_num_raster_pipes : 1;
bool has_num_user_clip_planes : 1;
bool has_paired_tiles : 1;
bool has_pbe2_in_xe : 1;
bool has_pds_ddmadt : 1;
bool has_robust_buffer_access : 1;
bool has_roguexe : 1;
@@ -277,6 +278,7 @@ struct pvr_device_features {
bool has_slc_cache_line_size_bits : 1;
bool has_slc_mcu_cache_controls : 1;
bool has_tf_bicubic_filter : 1;
bool has_tile_per_usc : 1;
bool has_tile_size_16x16 : 1;
bool has_tile_size_x : 1;
bool has_tile_size_y : 1;

View File

@@ -125,4 +125,6 @@
*/
#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U
#define PVR_NUM_PBE_EMIT_REGS 8U
#endif /* ROGUE_HW_DEFS_H */

View File

@@ -216,12 +216,22 @@ rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
return 0U;
}
static inline uint32_t
rogue_get_max_num_cores(const struct pvr_device_info *dev_info)
{
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
PVR_HAS_FEATURE(dev_info, xpu_max_slaves)) {
return PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
}
return 1U;
}
static inline uint32_t
rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
{
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
const uint32_t max_num_cores =
PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
const uint32_t max_num_cores = rogue_get_max_num_cores(dev_info);
const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
const uint32_t cdm_context_resume_buffer_stride =
ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);

View File

@@ -575,7 +575,7 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_render_pass *pass = render_pass_info->pass;
const struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[idx];
ASSERTED const struct pvr_load_op *load_op = hw_render->client_data;
ASSERTED const struct pvr_load_op *load_op = hw_render->load_op;
const struct pvr_renderpass_colorinit *color_init =
&hw_render->color_init[0];
const struct pvr_render_pass_attachment *attachment =
@@ -618,7 +618,7 @@ static VkResult pvr_load_op_pds_data_create_and_upload(
const struct pvr_render_pass_info *render_pass_info =
&cmd_buffer->state.render_pass_info;
const struct pvr_load_op *load_op =
render_pass_info->pass->hw_setup->renders[idx].client_data;
render_pass_info->pass->hw_setup->renders[idx].load_op;
struct pvr_device *device = cmd_buffer->device;
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
struct pvr_pds_pixel_shader_sa_program program = { 0 };
@@ -979,7 +979,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
/* FIXME: Don't do this if there is a barrier load. */
if (render_pass_info->enable_bg_tag) {
const struct pvr_load_op *load_op = hw_render->client_data;
const struct pvr_load_op *load_op = hw_render->load_op;
struct pvr_pds_upload load_op_program;
/* FIXME: Should we free the PDS pixel event data or let it be freed

File diff suppressed because it is too large Load Diff

View File

@@ -31,55 +31,64 @@
struct pvr_device;
struct pvr_render_pass;
struct pvr_renderpass_hwsetup_subpass {
/* If >=0 then copy the depth into this pixel output for all fragment
* programs in the subpass.
*/
int32_t z_replicate;
/* The operation to perform on the depth at the start of the subpass. Loads
* are deferred to subpasses when depth has been replicated
*/
VkAttachmentLoadOp depth_initop;
/* If true then clear the stencil at the start of the subpass. */
bool stencil_clear;
/* Driver Id from the input pvr_render_subpass structure. */
uint32_t index;
/* For each color attachment to the subpass: the operation to perform at
* the start of the subpass.
*/
VkAttachmentLoadOp *color_initops;
struct pvr_load_op *load_op;
};
struct pvr_renderpass_colorinit {
/* Source surface for the operation. */
uint32_t index;
/* Type of operation: either clear or load. */
VkAttachmentLoadOp op;
};
/* FIXME: Adding these USC enums and structures here for now to avoid adding
* usc.h header. Needs to be moved to compiler specific header.
*/
/* Specifies the location of render target writes. */
enum usc_mrt_resource_type {
USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */
USC_MRT_RESOURCE_TYPE_OUTPUT_REG,
USC_MRT_RESOURCE_TYPE_MEMORY,
};
enum pvr_resolve_type {
PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */
PVR_RESOLVE_TYPE_PBE,
PVR_RESOLVE_TYPE_TRANSFER,
};
enum pvr_renderpass_hwsetup_input_access {
/* The attachment must be loaded using a texture sample. */
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP,
/* The attachment can be loaded from an output register or tile buffer. */
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP,
/* As _ONCHIP but the attachment is the result of a Z replicate in the same
* subpass.
*/
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE,
};
#define PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS (4)
struct usc_mrt_desc {
/* Size (in bytes) of the intermediate storage required for each pixel in the
* render target.
*/
uint32_t intermediate_size;
/* Number of bytes allocated for each component in the output registers (as
* opposed to the pixel format).
*/
uint32_t component_alignment;
/* Mask of the bits from each dword which are read by the PBE. */
uint32_t valid_mask[PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS];
/* Higher number = higher priority. Used to decide which render targets get
* allocated dedicated output registers.
*/
uint32_t priority;
};
struct usc_mrt_resource {
/* Input description of render target. */
struct usc_mrt_desc mrt_desc;
/* Resource type allocated for render target. */
enum usc_mrt_resource_type type;
/* Intermediate pixel size (in bytes). */
uint32_t intermediate_size;
union {
/* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER. */
/* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
struct {
/* The output register to use. */
uint32_t output_reg;
@@ -90,7 +99,7 @@ struct usc_mrt_resource {
/* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
struct {
/* The number of the tile buffer to use. */
/* The index of the tile buffer to use. */
uint32_t tile_buffer;
/* The offset in dwords within the tile buffer. */
@@ -103,16 +112,25 @@ struct usc_mrt_setup {
/* Number of render targets present. */
uint32_t num_render_targets;
/* Number of output registers used per-pixel (1, 2 or 4). */
uint32_t num_output_regs;
/* Number of tile buffers used. */
uint32_t num_tile_buffers;
/* Size of a tile buffer in bytes. */
uint32_t tile_buffer_size;
/* Array of MRT resources allocated for each render target. The number of
* elements is determined by usc_mrt_setup::render_targets_count.
* elements is determined by usc_mrt_setup::num_render_targets.
*/
struct usc_mrt_resource *mrt_resources;
};
enum pvr_resolve_type {
PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
PVR_RESOLVE_TYPE_PBE,
PVR_RESOLVE_TYPE_TRANSFER,
/* Don't set up source pos in emit. */
bool disable_source_pos_override;
/* Hash unique to this particular setup. */
uint32_t hash;
};
struct pvr_renderpass_hwsetup_eot_surface {
@@ -138,6 +156,51 @@ struct pvr_renderpass_hwsetup_eot_surface {
uint32_t src_attachment_idx;
};
struct pvr_renderpass_hwsetup_subpass {
/* Mapping from fragment stage pixel outputs to hardware storage for all
* fragment programs in the subpass.
*/
struct usc_mrt_setup setup;
/* If >=0 then copy the depth into this pixel output for all fragment
* programs in the subpass.
*/
int32_t z_replicate;
/* The operation to perform on the depth at the start of the subpass. Loads
* are deferred to subpasses when depth has been replicated.
*/
VkAttachmentLoadOp depth_initop;
/* If true then clear the stencil at the start of the subpass. */
bool stencil_clear;
/* Subpass index from the input pvr_render_subpass structure. */
uint32_t index;
/* For each color attachment to the subpass the operation to perform at
* the start of the subpass.
*/
VkAttachmentLoadOp *color_initops;
struct pvr_load_op *load_op;
struct {
enum pvr_renderpass_hwsetup_input_access type;
uint32_t on_chip_rt;
} * input_access;
uint8_t output_register_mask;
};
struct pvr_renderpass_colorinit {
/* Source attachment for the operation. */
uint32_t index;
/* Type of operation either clear or load. */
VkAttachmentLoadOp op;
};
struct pvr_renderpass_hwsetup_render {
/* Number of pixel output registers to allocate for this render. */
uint32_t output_regs_count;
@@ -152,17 +215,17 @@ struct pvr_renderpass_hwsetup_render {
struct pvr_renderpass_hwsetup_subpass *subpasses;
/* The sample count of every color attachment (or depth attachment if
* z-only) in this render
* z-only) in this render.
*/
uint32_t sample_count;
/* Driver Id for the surface to use for depth/stencil load/store in this
/* Index of the attachment to use for depth/stencil load/store in this
* render.
*/
int32_t ds_attach_idx;
/* Operation on the on-chip depth at the start of the render.
* Either load from 'ds_surface_id', clear using 'ds_surface_id' or leave
* Either load from 'ds_attach_idx', clear using 'ds_attach_idx' or leave
* uninitialized.
*/
VkAttachmentLoadOp depth_init;
@@ -170,23 +233,33 @@ struct pvr_renderpass_hwsetup_render {
/* Operation on the on-chip stencil at the start of the render. */
VkAttachmentLoadOp stencil_init;
/* For each operation: the destination in the on-chip color storage. */
struct usc_mrt_setup init_setup;
/* Count of operations on on-chip color storage at the start of the render.
*/
uint32_t color_init_count;
/* For each operation: the destination in the on-chip color storage. */
struct usc_mrt_setup init_setup;
/* How to initialize render targets at the start of the render. */
struct pvr_renderpass_colorinit *color_init;
/* true to store depth to 'ds_attach_idx' at the end of the render. */
bool depth_store;
/* true to store stencil to 'ds_attach_idx' at the end of the render. */
bool stencil_store;
/* Describes the location of the source data for each stored surface. */
struct usc_mrt_setup eot_setup;
struct pvr_renderpass_hwsetup_eot_surface *eot_surfaces;
uint32_t eot_surface_count;
void *client_data;
uint32_t pbe_emits;
/* true if this HW render has lasting effects on its attachments. */
bool has_side_effects;
struct pvr_load_op *load_op;
};
struct pvr_renderpass_hw_map {
@@ -206,13 +279,18 @@ struct pvr_renderpass_hwsetup {
* that render where the subpass is scheduled.
*/
struct pvr_renderpass_hw_map *subpass_map;
bool *surface_allocate;
};
struct pvr_renderpass_hwsetup *
pvr_create_renderpass_hwsetup(struct pvr_device *device,
struct pvr_render_pass *pass,
bool disable_merge);
void pvr_destroy_renderpass_hwsetup(struct pvr_device *device,
VkResult pvr_create_renderpass_hwsetup(
struct pvr_device *device,
const VkAllocationCallbacks *alloc,
struct pvr_render_pass *pass,
bool disable_merge,
struct pvr_renderpass_hwsetup **const hw_setup_out);
void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
struct pvr_renderpass_hwsetup *hw_setup);
#endif /* PVR_HW_PASS_H */

View File

@@ -32,7 +32,7 @@
#include "pvr_device_info.h"
#include "util/u_math.h"
#define PVR_MAX_COLOR_ATTACHMENTS 8U /* Number of PBE emit registers. */
#define PVR_MAX_COLOR_ATTACHMENTS PVR_NUM_PBE_EMIT_REGS
#define PVR_MAX_QUEUES 2U
#define PVR_MAX_VIEWPORTS 1U
#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U

View File

@@ -499,11 +499,14 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
pass->max_tilebuffer_count =
PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
pass->hw_setup = pvr_create_renderpass_hwsetup(device, pass, false);
if (!pass->hw_setup) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
result =
pvr_create_renderpass_hwsetup(device,
pAllocator ? pAllocator : &device->vk.alloc,
pass,
false,
&pass->hw_setup);
if (result != VK_SUCCESS)
goto err_free_pass;
}
pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses);
@@ -516,7 +519,7 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
pvr_finishme("Set up tile buffer table");
if (!hw_render->color_init_count) {
assert(!hw_render->client_data);
assert(!hw_render->load_op);
continue;
}
@@ -527,7 +530,7 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
if (result != VK_SUCCESS)
goto err_load_op_destroy;
hw_render->client_data = load_op;
hw_render->load_op = load_op;
}
*pRenderPass = pvr_render_pass_to_handle(pass);
@@ -539,11 +542,12 @@ err_load_op_destroy:
struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i];
if (hw_render->client_data)
pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
if (hw_render->load_op)
pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
}
pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
pass->hw_setup);
err_free_pass:
vk_object_base_finish(&pass->base);
@@ -566,10 +570,11 @@ void pvr_DestroyRenderPass(VkDevice _device,
struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i];
pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
}
pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
pass->hw_setup);
vk_object_base_finish(&pass->base);
vk_free2(&device->vk.alloc, pAllocator, pass);
}