radv: pre-calculate viewport transforms

this requires more storage in the viewport struct, but it avoids
the need to repeatedly calculate the same transform if e.g., a meta
operation occurs, which can save about 5% cpu in some cases

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11839>
This commit is contained in:
Mike Blumenkrantz
2021-07-09 14:30:43 -04:00
committed by Marge Bot
parent 1e13cb1965
commit a2ef92d7a5
4 changed files with 23 additions and 13 deletions

View File

@@ -138,6 +138,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy
if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
src->viewport.count * sizeof(VkViewport))) {
typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count);
typed_memcpy(dest->viewport.xform, src->viewport.xform, src->viewport.count);
dest_mask |= RADV_DYNAMIC_VIEWPORT;
}
}
@@ -1376,27 +1377,23 @@ radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer)
int i;
const unsigned count = viewport->count;
const unsigned first_vp = 0;
const VkViewport *viewports = viewport->viewports;
assert(count);
radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE + first_vp * 4 * 6, count * 6);
for (i = 0; i < count; i++) {
float scale[3], translate[3];
radv_get_viewport_xform(&viewports[i], scale, translate);
radeon_emit(cmd_buffer->cs, fui(scale[0]));
radeon_emit(cmd_buffer->cs, fui(translate[0]));
radeon_emit(cmd_buffer->cs, fui(scale[1]));
radeon_emit(cmd_buffer->cs, fui(translate[1]));
radeon_emit(cmd_buffer->cs, fui(scale[2]));
radeon_emit(cmd_buffer->cs, fui(translate[2]));
radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[0]));
radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[0]));
radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[1]));
radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[1]));
radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[2]));
radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[2]));
}
radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + first_vp * 4 * 2, count * 2);
radeon_set_context_reg_seq(cmd_buffer->cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + first_vp * 4 * 2, count * 2);
for (i = 0; i < count; i++) {
float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
float zmin = MIN2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth);
float zmax = MAX2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth);
radeon_emit(cmd_buffer->cs, fui(zmin));
radeon_emit(cmd_buffer->cs, fui(zmax));
}
@@ -4416,6 +4413,8 @@ radv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint3
memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
viewportCount * sizeof(*pViewports));
for (unsigned i = firstViewport; i < firstViewport + viewportCount; i++)
radv_get_viewport_xform(&pViewports[i], state->dynamic.viewport.xform[i].scale, state->dynamic.viewport.xform[i].translate);
state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT;
}

View File

@@ -57,6 +57,8 @@ radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_
state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
typed_memcpy(state->viewport.viewports, cmd_buffer->state.dynamic.viewport.viewports,
MAX_VIEWPORTS);
typed_memcpy(state->viewport.xform, cmd_buffer->state.dynamic.viewport.xform,
MAX_VIEWPORTS);
/* Save all scissors. */
state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
@@ -149,6 +151,8 @@ radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buf
cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports, state->viewport.viewports,
MAX_VIEWPORTS);
typed_memcpy(cmd_buffer->state.dynamic.viewport.xform, state->viewport.xform,
MAX_VIEWPORTS);
/* Restore all scissors. */
cmd_buffer->state.dynamic.scissor.count = state->scissor.count;

View File

@@ -1540,6 +1540,9 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
if (states & RADV_DYNAMIC_VIEWPORT) {
typed_memcpy(dynamic->viewport.viewports, pCreateInfo->pViewportState->pViewports,
pCreateInfo->pViewportState->viewportCount);
for (unsigned i = 0; i < dynamic->viewport.count; i++)
radv_get_viewport_xform(&dynamic->viewport.viewports[i],
dynamic->viewport.xform[i].scale, dynamic->viewport.xform[i].translate);
}
}

View File

@@ -1129,6 +1129,10 @@ struct radv_streamout_state {
struct radv_viewport_state {
uint32_t count;
VkViewport viewports[MAX_VIEWPORTS];
struct {
float scale[3];
float translate[3];
} xform[MAX_VIEWPORTS];
};
struct radv_scissor_state {