pvr: Add support for generating render pass hw setup data.

Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18707>
This commit is contained in:
Rajnesh Kanwal
2022-07-05 12:26:37 +01:00
parent b57cd62698
commit 10b6a0d567
9 changed files with 2812 additions and 154 deletions

View File

@@ -137,6 +137,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_num_clusters = true, .has_num_clusters = true,
.has_num_raster_pipes = true, .has_num_raster_pipes = true,
.has_num_user_clip_planes = true, .has_num_user_clip_planes = true,
.has_pbe2_in_xe = true,
.has_roguexe = true, .has_roguexe = true,
.has_screen_size8K = true, .has_screen_size8K = true,
.has_simple_internal_parameter_format = true, .has_simple_internal_parameter_format = true,
@@ -216,6 +217,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_num_raster_pipes = true, .has_num_raster_pipes = true,
.has_num_user_clip_planes = true, .has_num_user_clip_planes = true,
.has_paired_tiles = true, .has_paired_tiles = true,
.has_pbe2_in_xe = true,
.has_pds_ddmadt = true, .has_pds_ddmadt = true,
.has_roguexe = true, .has_roguexe = true,
.has_screen_size8K = true, .has_screen_size8K = true,

View File

@@ -267,6 +267,7 @@ struct pvr_device_features {
bool has_num_raster_pipes : 1; bool has_num_raster_pipes : 1;
bool has_num_user_clip_planes : 1; bool has_num_user_clip_planes : 1;
bool has_paired_tiles : 1; bool has_paired_tiles : 1;
bool has_pbe2_in_xe : 1;
bool has_pds_ddmadt : 1; bool has_pds_ddmadt : 1;
bool has_robust_buffer_access : 1; bool has_robust_buffer_access : 1;
bool has_roguexe : 1; bool has_roguexe : 1;
@@ -277,6 +278,7 @@ struct pvr_device_features {
bool has_slc_cache_line_size_bits : 1; bool has_slc_cache_line_size_bits : 1;
bool has_slc_mcu_cache_controls : 1; bool has_slc_mcu_cache_controls : 1;
bool has_tf_bicubic_filter : 1; bool has_tf_bicubic_filter : 1;
bool has_tile_per_usc : 1;
bool has_tile_size_16x16 : 1; bool has_tile_size_16x16 : 1;
bool has_tile_size_x : 1; bool has_tile_size_x : 1;
bool has_tile_size_y : 1; bool has_tile_size_y : 1;

View File

@@ -125,4 +125,6 @@
*/ */
#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U #define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U
#define PVR_NUM_PBE_EMIT_REGS 8U
#endif /* ROGUE_HW_DEFS_H */ #endif /* ROGUE_HW_DEFS_H */

View File

@@ -216,12 +216,22 @@ rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
return 0U; return 0U;
} }
static inline uint32_t
rogue_get_max_num_cores(const struct pvr_device_info *dev_info)
{
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
PVR_HAS_FEATURE(dev_info, xpu_max_slaves)) {
return PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
}
return 1U;
}
static inline uint32_t static inline uint32_t
rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info) rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
{ {
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
const uint32_t max_num_cores = const uint32_t max_num_cores = rogue_get_max_num_cores(dev_info);
PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
const uint32_t cdm_context_resume_buffer_stride = const uint32_t cdm_context_resume_buffer_stride =
ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size); ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);

View File

@@ -575,7 +575,7 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_render_pass *pass = render_pass_info->pass; const struct pvr_render_pass *pass = render_pass_info->pass;
const struct pvr_renderpass_hwsetup_render *hw_render = const struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[idx]; &pass->hw_setup->renders[idx];
ASSERTED const struct pvr_load_op *load_op = hw_render->client_data; ASSERTED const struct pvr_load_op *load_op = hw_render->load_op;
const struct pvr_renderpass_colorinit *color_init = const struct pvr_renderpass_colorinit *color_init =
&hw_render->color_init[0]; &hw_render->color_init[0];
const struct pvr_render_pass_attachment *attachment = const struct pvr_render_pass_attachment *attachment =
@@ -618,7 +618,7 @@ static VkResult pvr_load_op_pds_data_create_and_upload(
const struct pvr_render_pass_info *render_pass_info = const struct pvr_render_pass_info *render_pass_info =
&cmd_buffer->state.render_pass_info; &cmd_buffer->state.render_pass_info;
const struct pvr_load_op *load_op = const struct pvr_load_op *load_op =
render_pass_info->pass->hw_setup->renders[idx].client_data; render_pass_info->pass->hw_setup->renders[idx].load_op;
struct pvr_device *device = cmd_buffer->device; struct pvr_device *device = cmd_buffer->device;
const struct pvr_device_info *dev_info = &device->pdevice->dev_info; const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
struct pvr_pds_pixel_shader_sa_program program = { 0 }; struct pvr_pds_pixel_shader_sa_program program = { 0 };
@@ -979,7 +979,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
/* FIXME: Don't do this if there is a barrier load. */ /* FIXME: Don't do this if there is a barrier load. */
if (render_pass_info->enable_bg_tag) { if (render_pass_info->enable_bg_tag) {
const struct pvr_load_op *load_op = hw_render->client_data; const struct pvr_load_op *load_op = hw_render->load_op;
struct pvr_pds_upload load_op_program; struct pvr_pds_upload load_op_program;
/* FIXME: Should we free the PDS pixel event data or let it be freed /* FIXME: Should we free the PDS pixel event data or let it be freed

File diff suppressed because it is too large Load Diff

View File

@@ -31,55 +31,64 @@
struct pvr_device; struct pvr_device;
struct pvr_render_pass; struct pvr_render_pass;
struct pvr_renderpass_hwsetup_subpass {
/* If >=0 then copy the depth into this pixel output for all fragment
* programs in the subpass.
*/
int32_t z_replicate;
/* The operation to perform on the depth at the start of the subpass. Loads
* are deferred to subpasses when depth has been replicated
*/
VkAttachmentLoadOp depth_initop;
/* If true then clear the stencil at the start of the subpass. */
bool stencil_clear;
/* Driver Id from the input pvr_render_subpass structure. */
uint32_t index;
/* For each color attachment to the subpass: the operation to perform at
* the start of the subpass.
*/
VkAttachmentLoadOp *color_initops;
struct pvr_load_op *load_op;
};
struct pvr_renderpass_colorinit {
/* Source surface for the operation. */
uint32_t index;
/* Type of operation: either clear or load. */
VkAttachmentLoadOp op;
};
/* FIXME: Adding these USC enums and structures here for now to avoid adding
* usc.h header. Needs to be moved to compiler specific header.
*/
/* Specifies the location of render target writes. */ /* Specifies the location of render target writes. */
enum usc_mrt_resource_type { enum usc_mrt_resource_type {
USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */ USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */
USC_MRT_RESOURCE_TYPE_OUTPUT_REG, USC_MRT_RESOURCE_TYPE_OUTPUT_REG,
USC_MRT_RESOURCE_TYPE_MEMORY, USC_MRT_RESOURCE_TYPE_MEMORY,
}; };
enum pvr_resolve_type {
PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */
PVR_RESOLVE_TYPE_PBE,
PVR_RESOLVE_TYPE_TRANSFER,
};
enum pvr_renderpass_hwsetup_input_access {
/* The attachment must be loaded using a texture sample. */
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP,
/* The attachment can be loaded from an output register or tile buffer. */
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP,
/* As _ONCHIP but the attachment is the result of a Z replicate in the same
* subpass.
*/
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE,
};
#define PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS (4)
struct usc_mrt_desc {
/* Size (in bytes) of the intermediate storage required for each pixel in the
* render target.
*/
uint32_t intermediate_size;
/* Number of bytes allocated for each component in the output registers (as
* opposed to the pixel format).
*/
uint32_t component_alignment;
/* Mask of the bits from each dword which are read by the PBE. */
uint32_t valid_mask[PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS];
/* Higher number = higher priority. Used to decide which render targets get
* allocated dedicated output registers.
*/
uint32_t priority;
};
struct usc_mrt_resource { struct usc_mrt_resource {
/* Input description of render target. */
struct usc_mrt_desc mrt_desc;
/* Resource type allocated for render target. */ /* Resource type allocated for render target. */
enum usc_mrt_resource_type type; enum usc_mrt_resource_type type;
/* Intermediate pixel size (in bytes). */
uint32_t intermediate_size;
union { union {
/* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER. */ /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
struct { struct {
/* The output register to use. */ /* The output register to use. */
uint32_t output_reg; uint32_t output_reg;
@@ -90,7 +99,7 @@ struct usc_mrt_resource {
/* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */ /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
struct { struct {
/* The number of the tile buffer to use. */ /* The index of the tile buffer to use. */
uint32_t tile_buffer; uint32_t tile_buffer;
/* The offset in dwords within the tile buffer. */ /* The offset in dwords within the tile buffer. */
@@ -103,16 +112,25 @@ struct usc_mrt_setup {
/* Number of render targets present. */ /* Number of render targets present. */
uint32_t num_render_targets; uint32_t num_render_targets;
/* Number of output registers used per-pixel (1, 2 or 4). */
uint32_t num_output_regs;
/* Number of tile buffers used. */
uint32_t num_tile_buffers;
/* Size of a tile buffer in bytes. */
uint32_t tile_buffer_size;
/* Array of MRT resources allocated for each render target. The number of /* Array of MRT resources allocated for each render target. The number of
* elements is determined by usc_mrt_setup::render_targets_count. * elements is determined by usc_mrt_setup::num_render_targets.
*/ */
struct usc_mrt_resource *mrt_resources; struct usc_mrt_resource *mrt_resources;
};
enum pvr_resolve_type { /* Don't set up source pos in emit. */
PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */ bool disable_source_pos_override;
PVR_RESOLVE_TYPE_PBE,
PVR_RESOLVE_TYPE_TRANSFER, /* Hash unique to this particular setup. */
uint32_t hash;
}; };
struct pvr_renderpass_hwsetup_eot_surface { struct pvr_renderpass_hwsetup_eot_surface {
@@ -138,6 +156,51 @@ struct pvr_renderpass_hwsetup_eot_surface {
uint32_t src_attachment_idx; uint32_t src_attachment_idx;
}; };
struct pvr_renderpass_hwsetup_subpass {
/* Mapping from fragment stage pixel outputs to hardware storage for all
* fragment programs in the subpass.
*/
struct usc_mrt_setup setup;
/* If >=0 then copy the depth into this pixel output for all fragment
* programs in the subpass.
*/
int32_t z_replicate;
/* The operation to perform on the depth at the start of the subpass. Loads
* are deferred to subpasses when depth has been replicated.
*/
VkAttachmentLoadOp depth_initop;
/* If true then clear the stencil at the start of the subpass. */
bool stencil_clear;
/* Subpass index from the input pvr_render_subpass structure. */
uint32_t index;
/* For each color attachment to the subpass the operation to perform at
* the start of the subpass.
*/
VkAttachmentLoadOp *color_initops;
struct pvr_load_op *load_op;
struct {
enum pvr_renderpass_hwsetup_input_access type;
uint32_t on_chip_rt;
} * input_access;
uint8_t output_register_mask;
};
struct pvr_renderpass_colorinit {
/* Source attachment for the operation. */
uint32_t index;
/* Type of operation either clear or load. */
VkAttachmentLoadOp op;
};
struct pvr_renderpass_hwsetup_render { struct pvr_renderpass_hwsetup_render {
/* Number of pixel output registers to allocate for this render. */ /* Number of pixel output registers to allocate for this render. */
uint32_t output_regs_count; uint32_t output_regs_count;
@@ -152,17 +215,17 @@ struct pvr_renderpass_hwsetup_render {
struct pvr_renderpass_hwsetup_subpass *subpasses; struct pvr_renderpass_hwsetup_subpass *subpasses;
/* The sample count of every color attachment (or depth attachment if /* The sample count of every color attachment (or depth attachment if
* z-only) in this render * z-only) in this render.
*/ */
uint32_t sample_count; uint32_t sample_count;
/* Driver Id for the surface to use for depth/stencil load/store in this /* Index of the attachment to use for depth/stencil load/store in this
* render. * render.
*/ */
int32_t ds_attach_idx; int32_t ds_attach_idx;
/* Operation on the on-chip depth at the start of the render. /* Operation on the on-chip depth at the start of the render.
* Either load from 'ds_surface_id', clear using 'ds_surface_id' or leave * Either load from 'ds_attach_idx', clear using 'ds_attach_idx' or leave
* uninitialized. * uninitialized.
*/ */
VkAttachmentLoadOp depth_init; VkAttachmentLoadOp depth_init;
@@ -170,23 +233,33 @@ struct pvr_renderpass_hwsetup_render {
/* Operation on the on-chip stencil at the start of the render. */ /* Operation on the on-chip stencil at the start of the render. */
VkAttachmentLoadOp stencil_init; VkAttachmentLoadOp stencil_init;
/* For each operation: the destination in the on-chip color storage. */
struct usc_mrt_setup init_setup;
/* Count of operations on on-chip color storage at the start of the render. /* Count of operations on on-chip color storage at the start of the render.
*/ */
uint32_t color_init_count; uint32_t color_init_count;
/* For each operation: the destination in the on-chip color storage. */
struct usc_mrt_setup init_setup;
/* How to initialize render targets at the start of the render. */ /* How to initialize render targets at the start of the render. */
struct pvr_renderpass_colorinit *color_init; struct pvr_renderpass_colorinit *color_init;
/* true to store depth to 'ds_attach_idx' at the end of the render. */
bool depth_store;
/* true to store stencil to 'ds_attach_idx' at the end of the render. */
bool stencil_store;
/* Describes the location of the source data for each stored surface. */ /* Describes the location of the source data for each stored surface. */
struct usc_mrt_setup eot_setup; struct usc_mrt_setup eot_setup;
struct pvr_renderpass_hwsetup_eot_surface *eot_surfaces; struct pvr_renderpass_hwsetup_eot_surface *eot_surfaces;
uint32_t eot_surface_count; uint32_t eot_surface_count;
void *client_data; uint32_t pbe_emits;
/* true if this HW render has lasting effects on its attachments. */
bool has_side_effects;
struct pvr_load_op *load_op;
}; };
struct pvr_renderpass_hw_map { struct pvr_renderpass_hw_map {
@@ -206,13 +279,18 @@ struct pvr_renderpass_hwsetup {
* that render where the subpass is scheduled. * that render where the subpass is scheduled.
*/ */
struct pvr_renderpass_hw_map *subpass_map; struct pvr_renderpass_hw_map *subpass_map;
bool *surface_allocate;
}; };
struct pvr_renderpass_hwsetup * VkResult pvr_create_renderpass_hwsetup(
pvr_create_renderpass_hwsetup(struct pvr_device *device, struct pvr_device *device,
struct pvr_render_pass *pass, const VkAllocationCallbacks *alloc,
bool disable_merge); struct pvr_render_pass *pass,
void pvr_destroy_renderpass_hwsetup(struct pvr_device *device, bool disable_merge,
struct pvr_renderpass_hwsetup **const hw_setup_out);
void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
struct pvr_renderpass_hwsetup *hw_setup); struct pvr_renderpass_hwsetup *hw_setup);
#endif /* PVR_HW_PASS_H */ #endif /* PVR_HW_PASS_H */

View File

@@ -32,7 +32,7 @@
#include "pvr_device_info.h" #include "pvr_device_info.h"
#include "util/u_math.h" #include "util/u_math.h"
#define PVR_MAX_COLOR_ATTACHMENTS 8U /* Number of PBE emit registers. */ #define PVR_MAX_COLOR_ATTACHMENTS PVR_NUM_PBE_EMIT_REGS
#define PVR_MAX_QUEUES 2U #define PVR_MAX_QUEUES 2U
#define PVR_MAX_VIEWPORTS 1U #define PVR_MAX_VIEWPORTS 1U
#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U #define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U

View File

@@ -499,11 +499,14 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
pass->max_tilebuffer_count = pass->max_tilebuffer_count =
PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info); PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
pass->hw_setup = pvr_create_renderpass_hwsetup(device, pass, false); result =
if (!pass->hw_setup) { pvr_create_renderpass_hwsetup(device,
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); pAllocator ? pAllocator : &device->vk.alloc,
pass,
false,
&pass->hw_setup);
if (result != VK_SUCCESS)
goto err_free_pass; goto err_free_pass;
}
pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses); pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses);
@@ -516,7 +519,7 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
pvr_finishme("Set up tile buffer table"); pvr_finishme("Set up tile buffer table");
if (!hw_render->color_init_count) { if (!hw_render->color_init_count) {
assert(!hw_render->client_data); assert(!hw_render->load_op);
continue; continue;
} }
@@ -527,7 +530,7 @@ VkResult pvr_CreateRenderPass2(VkDevice _device,
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto err_load_op_destroy; goto err_load_op_destroy;
hw_render->client_data = load_op; hw_render->load_op = load_op;
} }
*pRenderPass = pvr_render_pass_to_handle(pass); *pRenderPass = pvr_render_pass_to_handle(pass);
@@ -539,11 +542,12 @@ err_load_op_destroy:
struct pvr_renderpass_hwsetup_render *hw_render = struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i]; &pass->hw_setup->renders[i];
if (hw_render->client_data) if (hw_render->load_op)
pvr_load_op_destroy(device, pAllocator, hw_render->client_data); pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
} }
pvr_destroy_renderpass_hwsetup(device, pass->hw_setup); pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
pass->hw_setup);
err_free_pass: err_free_pass:
vk_object_base_finish(&pass->base); vk_object_base_finish(&pass->base);
@@ -566,10 +570,11 @@ void pvr_DestroyRenderPass(VkDevice _device,
struct pvr_renderpass_hwsetup_render *hw_render = struct pvr_renderpass_hwsetup_render *hw_render =
&pass->hw_setup->renders[i]; &pass->hw_setup->renders[i];
pvr_load_op_destroy(device, pAllocator, hw_render->client_data); pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
} }
pvr_destroy_renderpass_hwsetup(device, pass->hw_setup); pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
pass->hw_setup);
vk_object_base_finish(&pass->base); vk_object_base_finish(&pass->base);
vk_free2(&device->vk.alloc, pAllocator, pass); vk_free2(&device->vk.alloc, pAllocator, pass);
} }