pvr: Submit PR commands
This commit adds a partial render command to job submission. For geom only jobs we must always submit a pr command in case we enter SPM. For now, for geom+frag jobs, we'll also always submit a pr command event. Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Reviewed-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24138>
This commit is contained in:
@@ -1459,6 +1459,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||
job->pds_bgnd_reg_values);
|
||||
}
|
||||
|
||||
/* TODO: In some cases a PR can be removed by storing to the color attachment
|
||||
* and have the background object load directly from it instead of using the
|
||||
* scratch buffer. In those cases we can also set this to "false" and avoid
|
||||
* extra fw overhead.
|
||||
*/
|
||||
/* The scratch buffer is always needed and allocated to avoid data loss in
|
||||
* case SPM is hit so set the flag unconditionally.
|
||||
*/
|
||||
job->requires_spm_scratch_buffer = true;
|
||||
|
||||
memcpy(job->pr_pbe_reg_words,
|
||||
&framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
|
||||
sizeof(job->pbe_reg_words));
|
||||
job->pr_pds_pixel_event_data_offset =
|
||||
framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset;
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
typed_memcpy(job->pds_pr_bgnd_reg_values,
|
||||
@@ -1694,16 +1710,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||
job->run_frag = true;
|
||||
job->geometry_terminate = true;
|
||||
|
||||
/* TODO: In some cases a PR can be removed by storing to the color attachment
|
||||
* and have the background object load directly from it instead of using the
|
||||
* scratch buffer. In those cases we can also set this to "false" and avoid
|
||||
* extra fw overhead.
|
||||
*/
|
||||
/* The scratch buffer is always needed and allocated to avoid data loss in
|
||||
* case SPM is hit so set the flag unconditionally.
|
||||
*/
|
||||
job->requires_spm_scratch_buffer = true;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
@@ -1064,6 +1064,55 @@ pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
|
||||
pvr_geom_state_flags_init(job, &state->flags);
|
||||
}
|
||||
|
||||
static inline uint32_t pvr_frag_km_stream_pbe_reg_words_offset(
|
||||
const struct pvr_device_info *const dev_info)
|
||||
{
|
||||
uint32_t offset = 0;
|
||||
|
||||
offset += pvr_cmd_length(KMD_STREAM_HDR);
|
||||
offset += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
|
||||
offset += pvr_cmd_length(CR_ISP_DBIAS_BASE);
|
||||
offset += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
|
||||
offset += pvr_cmd_length(CR_ISP_ZLSCTL);
|
||||
offset += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
|
||||
offset += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
|
||||
|
||||
if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup))
|
||||
offset += pvr_cmd_length(CR_FB_CDC_ZLS);
|
||||
|
||||
return PVR_DW_TO_BYTES(offset);
|
||||
}
|
||||
|
||||
#define DWORDS_PER_U64 2
|
||||
|
||||
static inline uint32_t pvr_frag_km_stream_pds_eot_data_addr_offset(
|
||||
const struct pvr_device_info *const dev_info)
|
||||
{
|
||||
uint32_t offset = 0;
|
||||
|
||||
offset += pvr_frag_km_stream_pbe_reg_words_offset(dev_info) / 4U;
|
||||
offset +=
|
||||
PVR_MAX_COLOR_ATTACHMENTS * ROGUE_NUM_PBESTATE_REG_WORDS * DWORDS_PER_U64;
|
||||
offset += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
|
||||
offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
|
||||
offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
|
||||
offset += PVRX(KMD_STREAM_USC_CLEAR_REGISTER_COUNT) *
|
||||
pvr_cmd_length(CR_USC_CLEAR_REGISTER);
|
||||
offset += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
|
||||
offset += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
|
||||
offset += pvr_cmd_length(CR_ISP_BGOBJVALS);
|
||||
offset += pvr_cmd_length(CR_ISP_AA);
|
||||
offset += pvr_cmd_length(CR_ISP_CTL);
|
||||
offset += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
|
||||
|
||||
if (PVR_HAS_FEATURE(dev_info, cluster_grouping))
|
||||
offset += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM);
|
||||
|
||||
offset += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
|
||||
|
||||
return PVR_DW_TO_BYTES(offset);
|
||||
}
|
||||
|
||||
static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
||||
struct pvr_render_job *job,
|
||||
struct pvr_winsys_fragment_state *state)
|
||||
@@ -1197,7 +1246,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
||||
stream_ptr += pvr_cmd_length(CR_FB_CDC_ZLS);
|
||||
}
|
||||
|
||||
#define DWORDS_PER_U64 2
|
||||
/* Make sure that the pvr_frag_km_...() function is returning the correct
|
||||
* offset.
|
||||
*/
|
||||
assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
|
||||
pvr_frag_km_stream_pbe_reg_words_offset(dev_info));
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == PVR_MAX_COLOR_ATTACHMENTS);
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) ==
|
||||
@@ -1338,6 +1391,12 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
||||
/* clang-format on */
|
||||
stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
|
||||
|
||||
/* Make sure that the pvr_frag_km_...() function is returning the correct
|
||||
* offset.
|
||||
*/
|
||||
assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
|
||||
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info));
|
||||
|
||||
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
|
||||
value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
|
||||
}
|
||||
@@ -1388,6 +1447,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
#undef DWORDS_PER_U64
|
||||
|
||||
static void
|
||||
pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
|
||||
struct pvr_render_job *job,
|
||||
@@ -1452,6 +1513,53 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
|
||||
pvr_frag_state_flags_init(job, &state->flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets up the fragment state for a Partial Render (PR) based on the
|
||||
* state for a normal fragment job.
|
||||
*
|
||||
* The state of a fragment PR is almost the same as of that for a normal
|
||||
* fragment job apart the PBE words and the EOT program, both of which are
|
||||
* necessary for the render to use the SPM scratch buffer instead of the final
|
||||
* render targets.
|
||||
*
|
||||
* By basing the fragment PR state on that of a normal fragment state,
|
||||
* repacking of the same words can be avoided as we end up mostly doing copies
|
||||
* instead.
|
||||
*/
|
||||
static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
|
||||
const struct pvr_render_ctx *ctx,
|
||||
struct pvr_render_job *job,
|
||||
struct vk_sync *wait,
|
||||
struct pvr_winsys_fragment_state *frag,
|
||||
struct pvr_winsys_fragment_state *state)
|
||||
{
|
||||
const struct pvr_device_info *const dev_info =
|
||||
&ctx->device->pdevice->dev_info;
|
||||
const uint32_t pbe_reg_byte_offset =
|
||||
pvr_frag_km_stream_pbe_reg_words_offset(dev_info);
|
||||
const uint32_t eot_data_addr_byte_offset =
|
||||
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info);
|
||||
|
||||
/* Massive copy :( */
|
||||
*state = *frag;
|
||||
|
||||
assert(state->fw_stream_len >=
|
||||
pbe_reg_byte_offset + sizeof(job->pr_pbe_reg_words));
|
||||
memcpy(&state->fw_stream[pbe_reg_byte_offset],
|
||||
job->pr_pbe_reg_words,
|
||||
sizeof(job->pr_pbe_reg_words));
|
||||
|
||||
/* TODO: Update this when csbgen is byte instead of dword granular. */
|
||||
assert(state->fw_stream_len >=
|
||||
eot_data_addr_byte_offset +
|
||||
PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA)));
|
||||
pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset],
|
||||
CR_EVENT_PIXEL_PDS_DATA,
|
||||
eot_pds_data) {
|
||||
eot_pds_data.addr = PVR_DEV_ADDR(job->pr_pds_pixel_event_data_offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void pvr_render_job_ws_submit_info_init(
|
||||
struct pvr_render_ctx *ctx,
|
||||
struct pvr_render_job *job,
|
||||
@@ -1472,14 +1580,28 @@ static void pvr_render_job_ws_submit_info_init(
|
||||
wait_geom,
|
||||
&submit_info->geometry);
|
||||
|
||||
if (job->run_frag) {
|
||||
submit_info->run_frag = true;
|
||||
submit_info->has_fragment_job = job->run_frag;
|
||||
|
||||
pvr_render_job_ws_fragment_state_init(ctx,
|
||||
job,
|
||||
wait_frag,
|
||||
&submit_info->fragment);
|
||||
}
|
||||
/* TODO: Move the job setup from queue submit into cmd_buffer if possible. */
|
||||
|
||||
/* TODO: See if it's worth avoiding setting up the fragment state and setup
|
||||
* the pr state directly if `!job->run_frag`. For now we'll always set it up.
|
||||
*/
|
||||
pvr_render_job_ws_fragment_state_init(ctx,
|
||||
job,
|
||||
wait_frag,
|
||||
&submit_info->fragment);
|
||||
|
||||
/* TODO: In some cases we could eliminate the pr and use the frag directly in
|
||||
* case we enter SPM. There's likely some performance improvement to be had
|
||||
* there. For now we'll always setup the pr.
|
||||
*/
|
||||
pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
|
||||
ctx,
|
||||
job,
|
||||
wait_frag,
|
||||
&submit_info->fragment,
|
||||
&submit_info->fragment_pr);
|
||||
}
|
||||
|
||||
VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
|
||||
|
@@ -86,6 +86,7 @@ struct pvr_render_job {
|
||||
};
|
||||
|
||||
uint32_t pds_pixel_event_data_offset;
|
||||
uint32_t pr_pds_pixel_event_data_offset;
|
||||
|
||||
pvr_dev_addr_t ctrl_stream_addr;
|
||||
|
||||
@@ -147,6 +148,8 @@ struct pvr_render_job {
|
||||
"Cannot store both PBESTATE_REG_WORD{0,1}");
|
||||
uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
|
||||
[ROGUE_NUM_PBESTATE_REG_WORDS];
|
||||
uint64_t pr_pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
|
||||
[ROGUE_NUM_PBESTATE_REG_WORDS];
|
||||
|
||||
static_assert(pvr_cmd_length(CR_PDS_BGRND0_BASE) == 2,
|
||||
"CR_PDS_BGRND0_BASE cannot be stored in uint64_t");
|
||||
|
@@ -314,9 +314,7 @@ struct pvr_winsys_render_submit_info {
|
||||
|
||||
uint32_t frame_num;
|
||||
uint32_t job_num;
|
||||
|
||||
/* FIXME: should this be flags instead? */
|
||||
bool run_frag;
|
||||
bool has_fragment_job;
|
||||
|
||||
struct pvr_winsys_geometry_state {
|
||||
/* Firmware stream buffer. This is the maximum possible size taking into
|
||||
@@ -351,7 +349,7 @@ struct pvr_winsys_render_submit_info {
|
||||
} flags;
|
||||
|
||||
struct vk_sync *wait;
|
||||
} fragment;
|
||||
} fragment, fragment_pr;
|
||||
};
|
||||
|
||||
struct pvr_winsys_ops {
|
||||
|
@@ -879,17 +879,17 @@ static void pvr_srv_fragment_cmd_ext_stream_load(
|
||||
assert((const uint8_t *)ext_stream_ptr - stream == stream_len);
|
||||
}
|
||||
|
||||
static void pvr_srv_fragment_cmd_init(
|
||||
const struct pvr_winsys_render_submit_info *submit_info,
|
||||
struct rogue_fwif_cmd_3d *cmd,
|
||||
const struct pvr_device_info *dev_info)
|
||||
static void
|
||||
pvr_srv_fragment_cmd_init(struct rogue_fwif_cmd_3d *cmd,
|
||||
const struct pvr_winsys_fragment_state *state,
|
||||
const struct pvr_device_info *dev_info,
|
||||
uint32_t frame_num)
|
||||
{
|
||||
const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
|
||||
uint32_t ext_stream_offset;
|
||||
|
||||
memset(cmd, 0, sizeof(*cmd));
|
||||
|
||||
cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
|
||||
cmd->cmd_shared.cmn.frame_num = frame_num;
|
||||
|
||||
ext_stream_offset = pvr_srv_fragment_cmd_stream_load(cmd,
|
||||
state->fw_stream,
|
||||
@@ -944,7 +944,11 @@ VkResult pvr_srv_winsys_render_submit(
|
||||
struct pvr_srv_sync *srv_signal_sync_frag;
|
||||
|
||||
struct rogue_fwif_cmd_ta geom_cmd;
|
||||
struct rogue_fwif_cmd_3d frag_cmd;
|
||||
struct rogue_fwif_cmd_3d frag_cmd = { 0 };
|
||||
struct rogue_fwif_cmd_3d pr_cmd = { 0 };
|
||||
|
||||
uint8_t *frag_cmd_ptr = NULL;
|
||||
uint32_t frag_cmd_size = 0;
|
||||
|
||||
uint32_t current_sync_value = sync_prim->value;
|
||||
uint32_t geom_sync_update_value;
|
||||
@@ -962,10 +966,20 @@ VkResult pvr_srv_winsys_render_submit(
|
||||
|
||||
pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
|
||||
|
||||
if (submit_info->run_frag)
|
||||
pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
|
||||
else
|
||||
memset(&frag_cmd, 0, sizeof(frag_cmd));
|
||||
pvr_srv_fragment_cmd_init(&pr_cmd,
|
||||
&submit_info->fragment_pr,
|
||||
dev_info,
|
||||
submit_info->frame_num);
|
||||
|
||||
if (submit_info->has_fragment_job) {
|
||||
pvr_srv_fragment_cmd_init(&frag_cmd,
|
||||
&submit_info->fragment,
|
||||
dev_info,
|
||||
submit_info->frame_num);
|
||||
|
||||
frag_cmd_ptr = (uint8_t *)&frag_cmd;
|
||||
frag_cmd_size = sizeof(frag_cmd);
|
||||
}
|
||||
|
||||
if (submit_info->geometry.wait) {
|
||||
struct pvr_srv_sync *srv_wait_sync =
|
||||
@@ -1005,7 +1019,7 @@ VkResult pvr_srv_winsys_render_submit(
|
||||
/* Geometery is always kicked */
|
||||
geom_sync_update_value = ++current_sync_value;
|
||||
|
||||
if (submit_info->run_frag) {
|
||||
if (submit_info->has_fragment_job) {
|
||||
frag_sync_update_count = 1;
|
||||
frag_sync_update_value = ++current_sync_value;
|
||||
}
|
||||
@@ -1044,18 +1058,16 @@ VkResult pvr_srv_winsys_render_submit(
|
||||
"FRAG",
|
||||
sizeof(geom_cmd),
|
||||
(uint8_t *)&geom_cmd,
|
||||
/* Currently no support for PRs. */
|
||||
0,
|
||||
/* Currently no support for PRs. */
|
||||
NULL,
|
||||
sizeof(frag_cmd),
|
||||
(uint8_t *)&frag_cmd,
|
||||
sizeof(pr_cmd),
|
||||
(uint8_t *)&pr_cmd,
|
||||
frag_cmd_size,
|
||||
frag_cmd_ptr,
|
||||
submit_info->job_num,
|
||||
/* Always kick the TA. */
|
||||
true,
|
||||
/* Always kick a PR. */
|
||||
true,
|
||||
submit_info->run_frag,
|
||||
submit_info->has_fragment_job,
|
||||
false,
|
||||
0,
|
||||
rt_data_handle,
|
||||
|
Reference in New Issue
Block a user