pvr: Add support for generating render pass hw setup data.
Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18707>
This commit is contained in:
@@ -137,6 +137,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
|
||||
.has_num_clusters = true,
|
||||
.has_num_raster_pipes = true,
|
||||
.has_num_user_clip_planes = true,
|
||||
.has_pbe2_in_xe = true,
|
||||
.has_roguexe = true,
|
||||
.has_screen_size8K = true,
|
||||
.has_simple_internal_parameter_format = true,
|
||||
@@ -216,6 +217,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
|
||||
.has_num_raster_pipes = true,
|
||||
.has_num_user_clip_planes = true,
|
||||
.has_paired_tiles = true,
|
||||
.has_pbe2_in_xe = true,
|
||||
.has_pds_ddmadt = true,
|
||||
.has_roguexe = true,
|
||||
.has_screen_size8K = true,
|
||||
|
@@ -267,6 +267,7 @@ struct pvr_device_features {
|
||||
bool has_num_raster_pipes : 1;
|
||||
bool has_num_user_clip_planes : 1;
|
||||
bool has_paired_tiles : 1;
|
||||
bool has_pbe2_in_xe : 1;
|
||||
bool has_pds_ddmadt : 1;
|
||||
bool has_robust_buffer_access : 1;
|
||||
bool has_roguexe : 1;
|
||||
@@ -277,6 +278,7 @@ struct pvr_device_features {
|
||||
bool has_slc_cache_line_size_bits : 1;
|
||||
bool has_slc_mcu_cache_controls : 1;
|
||||
bool has_tf_bicubic_filter : 1;
|
||||
bool has_tile_per_usc : 1;
|
||||
bool has_tile_size_16x16 : 1;
|
||||
bool has_tile_size_x : 1;
|
||||
bool has_tile_size_y : 1;
|
||||
|
@@ -125,4 +125,6 @@
|
||||
*/
|
||||
#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U
|
||||
|
||||
#define PVR_NUM_PBE_EMIT_REGS 8U
|
||||
|
||||
#endif /* ROGUE_HW_DEFS_H */
|
||||
|
@@ -216,12 +216,22 @@ rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
|
||||
return 0U;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_max_num_cores(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
|
||||
PVR_HAS_FEATURE(dev_info, xpu_max_slaves)) {
|
||||
return PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
|
||||
}
|
||||
|
||||
return 1U;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
|
||||
const uint32_t max_num_cores =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
|
||||
const uint32_t max_num_cores = rogue_get_max_num_cores(dev_info);
|
||||
const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
|
||||
const uint32_t cdm_context_resume_buffer_stride =
|
||||
ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);
|
||||
|
@@ -575,7 +575,7 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_render_pass *pass = render_pass_info->pass;
|
||||
const struct pvr_renderpass_hwsetup_render *hw_render =
|
||||
&pass->hw_setup->renders[idx];
|
||||
ASSERTED const struct pvr_load_op *load_op = hw_render->client_data;
|
||||
ASSERTED const struct pvr_load_op *load_op = hw_render->load_op;
|
||||
const struct pvr_renderpass_colorinit *color_init =
|
||||
&hw_render->color_init[0];
|
||||
const struct pvr_render_pass_attachment *attachment =
|
||||
@@ -618,7 +618,7 @@ static VkResult pvr_load_op_pds_data_create_and_upload(
|
||||
const struct pvr_render_pass_info *render_pass_info =
|
||||
&cmd_buffer->state.render_pass_info;
|
||||
const struct pvr_load_op *load_op =
|
||||
render_pass_info->pass->hw_setup->renders[idx].client_data;
|
||||
render_pass_info->pass->hw_setup->renders[idx].load_op;
|
||||
struct pvr_device *device = cmd_buffer->device;
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
struct pvr_pds_pixel_shader_sa_program program = { 0 };
|
||||
@@ -979,7 +979,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||
|
||||
/* FIXME: Don't do this if there is a barrier load. */
|
||||
if (render_pass_info->enable_bg_tag) {
|
||||
const struct pvr_load_op *load_op = hw_render->client_data;
|
||||
const struct pvr_load_op *load_op = hw_render->load_op;
|
||||
struct pvr_pds_upload load_op_program;
|
||||
|
||||
/* FIXME: Should we free the PDS pixel event data or let it be freed
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -31,55 +31,64 @@
|
||||
struct pvr_device;
|
||||
struct pvr_render_pass;
|
||||
|
||||
struct pvr_renderpass_hwsetup_subpass {
|
||||
/* If >=0 then copy the depth into this pixel output for all fragment
|
||||
* programs in the subpass.
|
||||
*/
|
||||
int32_t z_replicate;
|
||||
|
||||
/* The operation to perform on the depth at the start of the subpass. Loads
|
||||
* are deferred to subpasses when depth has been replicated
|
||||
*/
|
||||
VkAttachmentLoadOp depth_initop;
|
||||
|
||||
/* If true then clear the stencil at the start of the subpass. */
|
||||
bool stencil_clear;
|
||||
|
||||
/* Driver Id from the input pvr_render_subpass structure. */
|
||||
uint32_t index;
|
||||
|
||||
/* For each color attachment to the subpass: the operation to perform at
|
||||
* the start of the subpass.
|
||||
*/
|
||||
VkAttachmentLoadOp *color_initops;
|
||||
|
||||
struct pvr_load_op *load_op;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_colorinit {
|
||||
/* Source surface for the operation. */
|
||||
uint32_t index;
|
||||
|
||||
/* Type of operation: either clear or load. */
|
||||
VkAttachmentLoadOp op;
|
||||
};
|
||||
|
||||
/* FIXME: Adding these USC enums and structures here for now to avoid adding
|
||||
* usc.h header. Needs to be moved to compiler specific header.
|
||||
*/
|
||||
/* Specifies the location of render target writes. */
|
||||
enum usc_mrt_resource_type {
|
||||
USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
|
||||
USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */
|
||||
USC_MRT_RESOURCE_TYPE_OUTPUT_REG,
|
||||
USC_MRT_RESOURCE_TYPE_MEMORY,
|
||||
};
|
||||
|
||||
enum pvr_resolve_type {
|
||||
PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */
|
||||
PVR_RESOLVE_TYPE_PBE,
|
||||
PVR_RESOLVE_TYPE_TRANSFER,
|
||||
};
|
||||
|
||||
enum pvr_renderpass_hwsetup_input_access {
|
||||
/* The attachment must be loaded using a texture sample. */
|
||||
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP,
|
||||
/* The attachment can be loaded from an output register or tile buffer. */
|
||||
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP,
|
||||
/* As _ONCHIP but the attachment is the result of a Z replicate in the same
|
||||
* subpass.
|
||||
*/
|
||||
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE,
|
||||
};
|
||||
|
||||
#define PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS (4)
|
||||
|
||||
struct usc_mrt_desc {
|
||||
/* Size (in bytes) of the intermediate storage required for each pixel in the
|
||||
* render target.
|
||||
*/
|
||||
uint32_t intermediate_size;
|
||||
|
||||
/* Number of bytes allocated for each component in the output registers (as
|
||||
* opposed to the pixel format).
|
||||
*/
|
||||
uint32_t component_alignment;
|
||||
|
||||
/* Mask of the bits from each dword which are read by the PBE. */
|
||||
uint32_t valid_mask[PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS];
|
||||
|
||||
/* Higher number = higher priority. Used to decide which render targets get
|
||||
* allocated dedicated output registers.
|
||||
*/
|
||||
uint32_t priority;
|
||||
};
|
||||
|
||||
struct usc_mrt_resource {
|
||||
/* Input description of render target. */
|
||||
struct usc_mrt_desc mrt_desc;
|
||||
|
||||
/* Resource type allocated for render target. */
|
||||
enum usc_mrt_resource_type type;
|
||||
|
||||
/* Intermediate pixel size (in bytes). */
|
||||
uint32_t intermediate_size;
|
||||
|
||||
union {
|
||||
/* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER. */
|
||||
/* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
|
||||
struct {
|
||||
/* The output register to use. */
|
||||
uint32_t output_reg;
|
||||
@@ -90,7 +99,7 @@ struct usc_mrt_resource {
|
||||
|
||||
/* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
|
||||
struct {
|
||||
/* The number of the tile buffer to use. */
|
||||
/* The index of the tile buffer to use. */
|
||||
uint32_t tile_buffer;
|
||||
|
||||
/* The offset in dwords within the tile buffer. */
|
||||
@@ -103,16 +112,25 @@ struct usc_mrt_setup {
|
||||
/* Number of render targets present. */
|
||||
uint32_t num_render_targets;
|
||||
|
||||
/* Number of output registers used per-pixel (1, 2 or 4). */
|
||||
uint32_t num_output_regs;
|
||||
|
||||
/* Number of tile buffers used. */
|
||||
uint32_t num_tile_buffers;
|
||||
|
||||
/* Size of a tile buffer in bytes. */
|
||||
uint32_t tile_buffer_size;
|
||||
|
||||
/* Array of MRT resources allocated for each render target. The number of
|
||||
* elements is determined by usc_mrt_setup::render_targets_count.
|
||||
* elements is determined by usc_mrt_setup::num_render_targets.
|
||||
*/
|
||||
struct usc_mrt_resource *mrt_resources;
|
||||
};
|
||||
|
||||
enum pvr_resolve_type {
|
||||
PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
|
||||
PVR_RESOLVE_TYPE_PBE,
|
||||
PVR_RESOLVE_TYPE_TRANSFER,
|
||||
/* Don't set up source pos in emit. */
|
||||
bool disable_source_pos_override;
|
||||
|
||||
/* Hash unique to this particular setup. */
|
||||
uint32_t hash;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_hwsetup_eot_surface {
|
||||
@@ -138,6 +156,51 @@ struct pvr_renderpass_hwsetup_eot_surface {
|
||||
uint32_t src_attachment_idx;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_hwsetup_subpass {
|
||||
/* Mapping from fragment stage pixel outputs to hardware storage for all
|
||||
* fragment programs in the subpass.
|
||||
*/
|
||||
struct usc_mrt_setup setup;
|
||||
|
||||
/* If >=0 then copy the depth into this pixel output for all fragment
|
||||
* programs in the subpass.
|
||||
*/
|
||||
int32_t z_replicate;
|
||||
|
||||
/* The operation to perform on the depth at the start of the subpass. Loads
|
||||
* are deferred to subpasses when depth has been replicated.
|
||||
*/
|
||||
VkAttachmentLoadOp depth_initop;
|
||||
|
||||
/* If true then clear the stencil at the start of the subpass. */
|
||||
bool stencil_clear;
|
||||
|
||||
/* Subpass index from the input pvr_render_subpass structure. */
|
||||
uint32_t index;
|
||||
|
||||
/* For each color attachment to the subpass the operation to perform at
|
||||
* the start of the subpass.
|
||||
*/
|
||||
VkAttachmentLoadOp *color_initops;
|
||||
|
||||
struct pvr_load_op *load_op;
|
||||
|
||||
struct {
|
||||
enum pvr_renderpass_hwsetup_input_access type;
|
||||
uint32_t on_chip_rt;
|
||||
} * input_access;
|
||||
|
||||
uint8_t output_register_mask;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_colorinit {
|
||||
/* Source attachment for the operation. */
|
||||
uint32_t index;
|
||||
|
||||
/* Type of operation either clear or load. */
|
||||
VkAttachmentLoadOp op;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_hwsetup_render {
|
||||
/* Number of pixel output registers to allocate for this render. */
|
||||
uint32_t output_regs_count;
|
||||
@@ -152,17 +215,17 @@ struct pvr_renderpass_hwsetup_render {
|
||||
struct pvr_renderpass_hwsetup_subpass *subpasses;
|
||||
|
||||
/* The sample count of every color attachment (or depth attachment if
|
||||
* z-only) in this render
|
||||
* z-only) in this render.
|
||||
*/
|
||||
uint32_t sample_count;
|
||||
|
||||
/* Driver Id for the surface to use for depth/stencil load/store in this
|
||||
/* Index of the attachment to use for depth/stencil load/store in this
|
||||
* render.
|
||||
*/
|
||||
int32_t ds_attach_idx;
|
||||
|
||||
/* Operation on the on-chip depth at the start of the render.
|
||||
* Either load from 'ds_surface_id', clear using 'ds_surface_id' or leave
|
||||
* Either load from 'ds_attach_idx', clear using 'ds_attach_idx' or leave
|
||||
* uninitialized.
|
||||
*/
|
||||
VkAttachmentLoadOp depth_init;
|
||||
@@ -170,23 +233,33 @@ struct pvr_renderpass_hwsetup_render {
|
||||
/* Operation on the on-chip stencil at the start of the render. */
|
||||
VkAttachmentLoadOp stencil_init;
|
||||
|
||||
/* For each operation: the destination in the on-chip color storage. */
|
||||
struct usc_mrt_setup init_setup;
|
||||
|
||||
/* Count of operations on on-chip color storage at the start of the render.
|
||||
*/
|
||||
uint32_t color_init_count;
|
||||
|
||||
/* For each operation: the destination in the on-chip color storage. */
|
||||
struct usc_mrt_setup init_setup;
|
||||
|
||||
/* How to initialize render targets at the start of the render. */
|
||||
struct pvr_renderpass_colorinit *color_init;
|
||||
|
||||
/* true to store depth to 'ds_attach_idx' at the end of the render. */
|
||||
bool depth_store;
|
||||
/* true to store stencil to 'ds_attach_idx' at the end of the render. */
|
||||
bool stencil_store;
|
||||
|
||||
/* Describes the location of the source data for each stored surface. */
|
||||
struct usc_mrt_setup eot_setup;
|
||||
|
||||
struct pvr_renderpass_hwsetup_eot_surface *eot_surfaces;
|
||||
uint32_t eot_surface_count;
|
||||
|
||||
void *client_data;
|
||||
uint32_t pbe_emits;
|
||||
|
||||
/* true if this HW render has lasting effects on its attachments. */
|
||||
bool has_side_effects;
|
||||
|
||||
struct pvr_load_op *load_op;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_hw_map {
|
||||
@@ -206,13 +279,18 @@ struct pvr_renderpass_hwsetup {
|
||||
* that render where the subpass is scheduled.
|
||||
*/
|
||||
struct pvr_renderpass_hw_map *subpass_map;
|
||||
|
||||
bool *surface_allocate;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_hwsetup *
|
||||
pvr_create_renderpass_hwsetup(struct pvr_device *device,
|
||||
struct pvr_render_pass *pass,
|
||||
bool disable_merge);
|
||||
void pvr_destroy_renderpass_hwsetup(struct pvr_device *device,
|
||||
VkResult pvr_create_renderpass_hwsetup(
|
||||
struct pvr_device *device,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
struct pvr_render_pass *pass,
|
||||
bool disable_merge,
|
||||
struct pvr_renderpass_hwsetup **const hw_setup_out);
|
||||
|
||||
void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
|
||||
struct pvr_renderpass_hwsetup *hw_setup);
|
||||
|
||||
#endif /* PVR_HW_PASS_H */
|
||||
|
@@ -32,7 +32,7 @@
|
||||
#include "pvr_device_info.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#define PVR_MAX_COLOR_ATTACHMENTS 8U /* Number of PBE emit registers. */
|
||||
#define PVR_MAX_COLOR_ATTACHMENTS PVR_NUM_PBE_EMIT_REGS
|
||||
#define PVR_MAX_QUEUES 2U
|
||||
#define PVR_MAX_VIEWPORTS 1U
|
||||
#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U
|
||||
|
@@ -499,11 +499,14 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
|
||||
pass->max_tilebuffer_count =
|
||||
PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
|
||||
|
||||
pass->hw_setup = pvr_create_renderpass_hwsetup(device, pass, false);
|
||||
if (!pass->hw_setup) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
result =
|
||||
pvr_create_renderpass_hwsetup(device,
|
||||
pAllocator ? pAllocator : &device->vk.alloc,
|
||||
pass,
|
||||
false,
|
||||
&pass->hw_setup);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_pass;
|
||||
}
|
||||
|
||||
pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses);
|
||||
|
||||
@@ -516,7 +519,7 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
|
||||
pvr_finishme("Set up tile buffer table");
|
||||
|
||||
if (!hw_render->color_init_count) {
|
||||
assert(!hw_render->client_data);
|
||||
assert(!hw_render->load_op);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -527,7 +530,7 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_load_op_destroy;
|
||||
|
||||
hw_render->client_data = load_op;
|
||||
hw_render->load_op = load_op;
|
||||
}
|
||||
|
||||
*pRenderPass = pvr_render_pass_to_handle(pass);
|
||||
@@ -539,11 +542,12 @@ err_load_op_destroy:
|
||||
struct pvr_renderpass_hwsetup_render *hw_render =
|
||||
&pass->hw_setup->renders[i];
|
||||
|
||||
if (hw_render->client_data)
|
||||
pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
|
||||
if (hw_render->load_op)
|
||||
pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
|
||||
}
|
||||
|
||||
pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
|
||||
pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
|
||||
pass->hw_setup);
|
||||
|
||||
err_free_pass:
|
||||
vk_object_base_finish(&pass->base);
|
||||
@@ -566,10 +570,11 @@ void pvr_DestroyRenderPass(VkDevice _device,
|
||||
struct pvr_renderpass_hwsetup_render *hw_render =
|
||||
&pass->hw_setup->renders[i];
|
||||
|
||||
pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
|
||||
pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
|
||||
}
|
||||
|
||||
pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
|
||||
pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
|
||||
pass->hw_setup);
|
||||
vk_object_base_finish(&pass->base);
|
||||
vk_free2(&device->vk.alloc, pAllocator, pass);
|
||||
}
|
||||
|
Reference in New Issue
Block a user