tu: Lazily init VSC to fix dynamic rendering in secondary cmdbufs
Dynamic renderpasses need vsc_prim_strm_pitch, vsc_draw_strm_pitch values, and a correct BO. The easiest way to solve this is to lazily init VSC when it is needed, and not at every cmdbuf initialization. Fixes CTS tests (when running with TU_DEBUG=gmem,forcebin): dEQP-VK.draw.dynamic_rendering.complete_secondary_cmd_buff.* Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18996>
This commit is contained in:

committed by
Marge Bot

parent
e70a2148e5
commit
4eba6d71a8
@@ -83,6 +83,53 @@ tu6_lazy_emit_tessfactor_addr(struct tu_cmd_buffer *cmd)
|
||||
cmd->state.tessfactor_addr_set = true;
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_lazy_emit_vsc(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
struct tu_device *dev = cmd->device;
|
||||
|
||||
/* VSC buffers:
|
||||
* use vsc pitches from the largest values used so far with this device
|
||||
* if there hasn't been overflow, there will already be a scratch bo
|
||||
* allocated for these sizes
|
||||
*
|
||||
* if overflow is detected, the stream size is increased by 2x
|
||||
*/
|
||||
mtx_lock(&dev->mutex);
|
||||
|
||||
struct tu6_global *global = dev->global_bo->map;
|
||||
|
||||
uint32_t vsc_draw_overflow = global->vsc_draw_overflow;
|
||||
uint32_t vsc_prim_overflow = global->vsc_prim_overflow;
|
||||
|
||||
if (vsc_draw_overflow >= dev->vsc_draw_strm_pitch)
|
||||
dev->vsc_draw_strm_pitch = (dev->vsc_draw_strm_pitch - VSC_PAD) * 2 + VSC_PAD;
|
||||
|
||||
if (vsc_prim_overflow >= dev->vsc_prim_strm_pitch)
|
||||
dev->vsc_prim_strm_pitch = (dev->vsc_prim_strm_pitch - VSC_PAD) * 2 + VSC_PAD;
|
||||
|
||||
cmd->vsc_prim_strm_pitch = dev->vsc_prim_strm_pitch;
|
||||
cmd->vsc_draw_strm_pitch = dev->vsc_draw_strm_pitch;
|
||||
|
||||
mtx_unlock(&dev->mutex);
|
||||
|
||||
struct tu_bo *vsc_bo;
|
||||
uint32_t size0 = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES +
|
||||
cmd->vsc_draw_strm_pitch * MAX_VSC_PIPES;
|
||||
|
||||
tu_get_scratch_bo(dev, size0 + MAX_VSC_PIPES * 4, &vsc_bo);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = vsc_bo, .bo_offset = size0));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = vsc_bo));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_DRAW_STRM_ADDRESS(.bo = vsc_bo,
|
||||
.bo_offset = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES));
|
||||
|
||||
cmd->vsc_initialized = true;
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
|
||||
struct tu_cs *cs,
|
||||
@@ -951,45 +998,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = dev->global_bo,
|
||||
.bo_offset = gb_offset(bcolor_builtin)));
|
||||
|
||||
/* VSC buffers:
|
||||
* use vsc pitches from the largest values used so far with this device
|
||||
* if there hasn't been overflow, there will already be a scratch bo
|
||||
* allocated for these sizes
|
||||
*
|
||||
* if overflow is detected, the stream size is increased by 2x
|
||||
*/
|
||||
mtx_lock(&dev->mutex);
|
||||
|
||||
struct tu6_global *global = dev->global_bo->map;
|
||||
|
||||
uint32_t vsc_draw_overflow = global->vsc_draw_overflow;
|
||||
uint32_t vsc_prim_overflow = global->vsc_prim_overflow;
|
||||
|
||||
if (vsc_draw_overflow >= dev->vsc_draw_strm_pitch)
|
||||
dev->vsc_draw_strm_pitch = (dev->vsc_draw_strm_pitch - VSC_PAD) * 2 + VSC_PAD;
|
||||
|
||||
if (vsc_prim_overflow >= dev->vsc_prim_strm_pitch)
|
||||
dev->vsc_prim_strm_pitch = (dev->vsc_prim_strm_pitch - VSC_PAD) * 2 + VSC_PAD;
|
||||
|
||||
cmd->vsc_prim_strm_pitch = dev->vsc_prim_strm_pitch;
|
||||
cmd->vsc_draw_strm_pitch = dev->vsc_draw_strm_pitch;
|
||||
|
||||
mtx_unlock(&dev->mutex);
|
||||
|
||||
struct tu_bo *vsc_bo;
|
||||
uint32_t size0 = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES +
|
||||
cmd->vsc_draw_strm_pitch * MAX_VSC_PIPES;
|
||||
|
||||
tu_get_scratch_bo(dev, size0 + MAX_VSC_PIPES * 4, &vsc_bo);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = vsc_bo, .bo_offset = size0));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = vsc_bo));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_DRAW_STRM_ADDRESS(.bo = vsc_bo,
|
||||
.bo_offset = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES));
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
}
|
||||
|
||||
@@ -1378,6 +1386,10 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_GMEM);
|
||||
|
||||
if (use_hw_binning(cmd)) {
|
||||
if (!cmd->vsc_initialized) {
|
||||
tu6_lazy_emit_vsc(cmd, cs);
|
||||
}
|
||||
|
||||
tu6_emit_bin_size(cs, tiling->tile0.width, tiling->tile0.height,
|
||||
A6XX_RB_BIN_CONTROL_RENDER_MODE(BINNING_PASS) |
|
||||
A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
|
||||
@@ -1712,6 +1724,7 @@ tu_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
||||
u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context);
|
||||
|
||||
cmd_buffer->state.max_vbs_bound = 0;
|
||||
cmd_buffer->vsc_initialized = false;
|
||||
|
||||
cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL;
|
||||
}
|
||||
|
@@ -588,6 +588,7 @@ struct tu_cmd_buffer
|
||||
|
||||
uint32_t vsc_draw_strm_pitch;
|
||||
uint32_t vsc_prim_strm_pitch;
|
||||
bool vsc_initialized;
|
||||
};
|
||||
VK_DEFINE_HANDLE_CASTS(tu_cmd_buffer, vk.base, VkCommandBuffer,
|
||||
VK_OBJECT_TYPE_COMMAND_BUFFER)
|
||||
|
@@ -201,8 +201,6 @@ tu_insert_dynamic_cmdbufs(struct tu_device *dev,
|
||||
* because we skip tu6_emit_hw().
|
||||
*/
|
||||
cmd_buffer->state.ccu_state = old_cmds[i]->state.ccu_state;
|
||||
cmd_buffer->vsc_draw_strm_pitch = old_cmds[i]->vsc_draw_strm_pitch;
|
||||
cmd_buffer->vsc_prim_strm_pitch = old_cmds[i]->vsc_prim_strm_pitch;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user