pvr: Submit PR commands

This commit adds a partial render command to job submission.
For geom only jobs we must always submit a pr command in case we
enter SPM. For now, for geom+frag jobs, we'll also always submit
a pr command event.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24138>
This commit is contained in:
Karmjit Mahil
2023-06-26 11:52:39 +01:00
committed by Marge Bot
parent 3c9d1a6cfa
commit 3798f99c46
5 changed files with 182 additions and 41 deletions

View File

@@ -1459,6 +1459,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
job->pds_bgnd_reg_values);
}
/* TODO: In some cases a PR can be removed by storing to the color attachment
* and have the background object load directly from it instead of using the
* scratch buffer. In those cases we can also set this to "false" and avoid
* extra fw overhead.
*/
/* The scratch buffer is always needed and allocated to avoid data loss in
* case SPM is hit so set the flag unconditionally.
*/
job->requires_spm_scratch_buffer = true;
memcpy(job->pr_pbe_reg_words,
&framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
sizeof(job->pbe_reg_words));
job->pr_pds_pixel_event_data_offset =
framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset;
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pds_pr_bgnd_reg_values,
@@ -1694,16 +1710,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
job->run_frag = true;
job->geometry_terminate = true;
/* TODO: In some cases a PR can be removed by storing to the color attachment
* and have the background object load directly from it instead of using the
* scratch buffer. In those cases we can also set this to "false" and avoid
* extra fw overhead.
*/
/* The scratch buffer is always needed and allocated to avoid data loss in
* case SPM is hit so set the flag unconditionally.
*/
job->requires_spm_scratch_buffer = true;
return VK_SUCCESS;
}

View File

@@ -1064,6 +1064,55 @@ pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
pvr_geom_state_flags_init(job, &state->flags);
}
static inline uint32_t pvr_frag_km_stream_pbe_reg_words_offset(
const struct pvr_device_info *const dev_info)
{
uint32_t offset = 0;
offset += pvr_cmd_length(KMD_STREAM_HDR);
offset += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
offset += pvr_cmd_length(CR_ISP_DBIAS_BASE);
offset += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
offset += pvr_cmd_length(CR_ISP_ZLSCTL);
offset += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
offset += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup))
offset += pvr_cmd_length(CR_FB_CDC_ZLS);
return PVR_DW_TO_BYTES(offset);
}
#define DWORDS_PER_U64 2
static inline uint32_t pvr_frag_km_stream_pds_eot_data_addr_offset(
const struct pvr_device_info *const dev_info)
{
uint32_t offset = 0;
offset += pvr_frag_km_stream_pbe_reg_words_offset(dev_info) / 4U;
offset +=
PVR_MAX_COLOR_ATTACHMENTS * ROGUE_NUM_PBESTATE_REG_WORDS * DWORDS_PER_U64;
offset += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
offset += PVRX(KMD_STREAM_USC_CLEAR_REGISTER_COUNT) *
pvr_cmd_length(CR_USC_CLEAR_REGISTER);
offset += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
offset += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
offset += pvr_cmd_length(CR_ISP_BGOBJVALS);
offset += pvr_cmd_length(CR_ISP_AA);
offset += pvr_cmd_length(CR_ISP_CTL);
offset += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
if (PVR_HAS_FEATURE(dev_info, cluster_grouping))
offset += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM);
offset += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
return PVR_DW_TO_BYTES(offset);
}
static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_fragment_state *state)
@@ -1197,7 +1246,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
stream_ptr += pvr_cmd_length(CR_FB_CDC_ZLS);
}
#define DWORDS_PER_U64 2
/* Make sure that the pvr_frag_km_...() function is returning the correct
* offset.
*/
assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
pvr_frag_km_stream_pbe_reg_words_offset(dev_info));
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == PVR_MAX_COLOR_ATTACHMENTS);
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) ==
@@ -1338,6 +1391,12 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
/* clang-format on */
stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
/* Make sure that the pvr_frag_km_...() function is returning the correct
* offset.
*/
assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info));
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
}
@@ -1388,6 +1447,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
}
}
#undef DWORDS_PER_U64
static void
pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
@@ -1452,6 +1513,53 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
pvr_frag_state_flags_init(job, &state->flags);
}
/**
* \brief Sets up the fragment state for a Partial Render (PR) based on the
* state for a normal fragment job.
*
* The state of a fragment PR is almost the same as of that for a normal
* fragment job apart the PBE words and the EOT program, both of which are
* necessary for the render to use the SPM scratch buffer instead of the final
* render targets.
*
* By basing the fragment PR state on that of a normal fragment state,
* repacking of the same words can be avoided as we end up mostly doing copies
* instead.
*/
static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
const struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct vk_sync *wait,
struct pvr_winsys_fragment_state *frag,
struct pvr_winsys_fragment_state *state)
{
const struct pvr_device_info *const dev_info =
&ctx->device->pdevice->dev_info;
const uint32_t pbe_reg_byte_offset =
pvr_frag_km_stream_pbe_reg_words_offset(dev_info);
const uint32_t eot_data_addr_byte_offset =
pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info);
/* Massive copy :( */
*state = *frag;
assert(state->fw_stream_len >=
pbe_reg_byte_offset + sizeof(job->pr_pbe_reg_words));
memcpy(&state->fw_stream[pbe_reg_byte_offset],
job->pr_pbe_reg_words,
sizeof(job->pr_pbe_reg_words));
/* TODO: Update this when csbgen is byte instead of dword granular. */
assert(state->fw_stream_len >=
eot_data_addr_byte_offset +
PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA)));
pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset],
CR_EVENT_PIXEL_PDS_DATA,
eot_pds_data) {
eot_pds_data.addr = PVR_DEV_ADDR(job->pr_pds_pixel_event_data_offset);
}
}
static void pvr_render_job_ws_submit_info_init(
struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
@@ -1472,14 +1580,28 @@ static void pvr_render_job_ws_submit_info_init(
wait_geom,
&submit_info->geometry);
if (job->run_frag) {
submit_info->run_frag = true;
submit_info->has_fragment_job = job->run_frag;
pvr_render_job_ws_fragment_state_init(ctx,
job,
wait_frag,
&submit_info->fragment);
}
/* TODO: Move the job setup from queue submit into cmd_buffer if possible. */
/* TODO: See if it's worth avoiding setting up the fragment state and setup
* the pr state directly if `!job->run_frag`. For now we'll always set it up.
*/
pvr_render_job_ws_fragment_state_init(ctx,
job,
wait_frag,
&submit_info->fragment);
/* TODO: In some cases we could eliminate the pr and use the frag directly in
* case we enter SPM. There's likely some performance improvement to be had
* there. For now we'll always setup the pr.
*/
pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
ctx,
job,
wait_frag,
&submit_info->fragment,
&submit_info->fragment_pr);
}
VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,

View File

@@ -86,6 +86,7 @@ struct pvr_render_job {
};
uint32_t pds_pixel_event_data_offset;
uint32_t pr_pds_pixel_event_data_offset;
pvr_dev_addr_t ctrl_stream_addr;
@@ -147,6 +148,8 @@ struct pvr_render_job {
"Cannot store both PBESTATE_REG_WORD{0,1}");
uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
[ROGUE_NUM_PBESTATE_REG_WORDS];
uint64_t pr_pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
[ROGUE_NUM_PBESTATE_REG_WORDS];
static_assert(pvr_cmd_length(CR_PDS_BGRND0_BASE) == 2,
"CR_PDS_BGRND0_BASE cannot be stored in uint64_t");

View File

@@ -314,9 +314,7 @@ struct pvr_winsys_render_submit_info {
uint32_t frame_num;
uint32_t job_num;
/* FIXME: should this be flags instead? */
bool run_frag;
bool has_fragment_job;
struct pvr_winsys_geometry_state {
/* Firmware stream buffer. This is the maximum possible size taking into
@@ -351,7 +349,7 @@ struct pvr_winsys_render_submit_info {
} flags;
struct vk_sync *wait;
} fragment;
} fragment, fragment_pr;
};
struct pvr_winsys_ops {

View File

@@ -879,17 +879,17 @@ static void pvr_srv_fragment_cmd_ext_stream_load(
assert((const uint8_t *)ext_stream_ptr - stream == stream_len);
}
static void pvr_srv_fragment_cmd_init(
const struct pvr_winsys_render_submit_info *submit_info,
struct rogue_fwif_cmd_3d *cmd,
const struct pvr_device_info *dev_info)
static void
pvr_srv_fragment_cmd_init(struct rogue_fwif_cmd_3d *cmd,
const struct pvr_winsys_fragment_state *state,
const struct pvr_device_info *dev_info,
uint32_t frame_num)
{
const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
uint32_t ext_stream_offset;
memset(cmd, 0, sizeof(*cmd));
cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
cmd->cmd_shared.cmn.frame_num = frame_num;
ext_stream_offset = pvr_srv_fragment_cmd_stream_load(cmd,
state->fw_stream,
@@ -944,7 +944,11 @@ VkResult pvr_srv_winsys_render_submit(
struct pvr_srv_sync *srv_signal_sync_frag;
struct rogue_fwif_cmd_ta geom_cmd;
struct rogue_fwif_cmd_3d frag_cmd;
struct rogue_fwif_cmd_3d frag_cmd = { 0 };
struct rogue_fwif_cmd_3d pr_cmd = { 0 };
uint8_t *frag_cmd_ptr = NULL;
uint32_t frag_cmd_size = 0;
uint32_t current_sync_value = sync_prim->value;
uint32_t geom_sync_update_value;
@@ -962,10 +966,20 @@ VkResult pvr_srv_winsys_render_submit(
pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
if (submit_info->run_frag)
pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
else
memset(&frag_cmd, 0, sizeof(frag_cmd));
pvr_srv_fragment_cmd_init(&pr_cmd,
&submit_info->fragment_pr,
dev_info,
submit_info->frame_num);
if (submit_info->has_fragment_job) {
pvr_srv_fragment_cmd_init(&frag_cmd,
&submit_info->fragment,
dev_info,
submit_info->frame_num);
frag_cmd_ptr = (uint8_t *)&frag_cmd;
frag_cmd_size = sizeof(frag_cmd);
}
if (submit_info->geometry.wait) {
struct pvr_srv_sync *srv_wait_sync =
@@ -1005,7 +1019,7 @@ VkResult pvr_srv_winsys_render_submit(
/* Geometery is always kicked */
geom_sync_update_value = ++current_sync_value;
if (submit_info->run_frag) {
if (submit_info->has_fragment_job) {
frag_sync_update_count = 1;
frag_sync_update_value = ++current_sync_value;
}
@@ -1044,18 +1058,16 @@ VkResult pvr_srv_winsys_render_submit(
"FRAG",
sizeof(geom_cmd),
(uint8_t *)&geom_cmd,
/* Currently no support for PRs. */
0,
/* Currently no support for PRs. */
NULL,
sizeof(frag_cmd),
(uint8_t *)&frag_cmd,
sizeof(pr_cmd),
(uint8_t *)&pr_cmd,
frag_cmd_size,
frag_cmd_ptr,
submit_info->job_num,
/* Always kick the TA. */
true,
/* Always kick a PR. */
true,
submit_info->run_frag,
submit_info->has_fragment_job,
false,
0,
rt_data_handle,