tu: Disable LRZ properly on A7XX
LRZ wasn't entirely disabled due to the register `A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO` not being set to `0` in all circumstances, this register affects rendering even when LRZ is disabled so needs to be set to `0` until LRZ is properly implemented. Signed-off-by: Mark Collins <mark@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26461>
This commit is contained in:
@@ -2340,7 +2340,7 @@ to upconvert to 32b float internally?
|
||||
<!-- A bit tentative but it's a color and it is followed by LRZ_CLEAR -->
|
||||
<reg32 offset="0x8111" name="GRAS_LRZ_CLEAR_DEPTH_F32" type="float" variants="A7XX-"/>
|
||||
|
||||
<reg32 offset="0x8113" name="GRAS_UNKNOWN_8113" variants="A7XX-" usage="rp_blit"/>
|
||||
<reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"/>
|
||||
|
||||
<!-- Always written together and always equal 09510840 00000a62 -->
|
||||
<reg32 offset="0x8120" name="GRAS_UNKNOWN_8120" variants="A7XX-" usage="cmd"/>
|
||||
|
@@ -1317,6 +1317,7 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
|
||||
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
|
||||
enum pipe_format src_format)
|
||||
@@ -1338,6 +1339,9 @@ r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
|
||||
tu_cs_image_flag_ref(cs, iview, layer);
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(0));
|
||||
|
||||
/* Use color format from RB_MRT_BUF_INFO. This register is relevant for
|
||||
* FMT6_NV12_Y.
|
||||
*/
|
||||
@@ -1657,7 +1661,7 @@ static const struct blit_ops r3d_ops = {
|
||||
.clear_value = r3d_clear_value,
|
||||
.src = r3d_src,
|
||||
.src_buffer = r3d_src_buffer<CHIP>,
|
||||
.dst = r3d_dst,
|
||||
.dst = r3d_dst<CHIP>,
|
||||
.dst_depth = r3d_dst_depth,
|
||||
.dst_stencil = r3d_dst_stencil,
|
||||
.dst_buffer = r3d_dst_buffer,
|
||||
@@ -2015,7 +2019,7 @@ tu_CmdBlitImage2(VkCommandBuffer commandBuffer,
|
||||
}
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
tu_disable_lrz(cmd, &cmd->cs, dst_image);
|
||||
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_CmdBlitImage2);
|
||||
@@ -2133,7 +2137,7 @@ tu_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
|
||||
pCopyBufferToImageInfo->pRegions + i);
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
tu_disable_lrz(cmd, &cmd->cs, dst_image);
|
||||
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_CmdCopyBufferToImage2);
|
||||
@@ -2477,7 +2481,7 @@ tu_CmdCopyImage2(VkCommandBuffer commandBuffer,
|
||||
}
|
||||
|
||||
if (dst_image->lrz_height) {
|
||||
tu_disable_lrz(cmd, &cmd->cs, dst_image);
|
||||
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_CmdCopyImage2);
|
||||
@@ -2840,7 +2844,7 @@ tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
|
||||
clear_image<CHIP>(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
|
||||
}
|
||||
|
||||
tu_lrz_clear_depth_image(cmd, image, pDepthStencil, rangeCount, pRanges);
|
||||
tu_lrz_clear_depth_image<CHIP>(cmd, image, pDepthStencil, rangeCount, pRanges);
|
||||
}
|
||||
TU_GENX(tu_CmdClearDepthStencilImage);
|
||||
|
||||
@@ -3799,7 +3803,7 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
|
||||
r3d_dst_stencil(cs, iview, layer);
|
||||
}
|
||||
} else {
|
||||
r3d_dst(cs, &iview->view, layer, src_format);
|
||||
r3d_dst<CHIP>(cs, &iview->view, layer, src_format);
|
||||
}
|
||||
|
||||
r3d_src_gmem<CHIP>(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp);
|
||||
|
@@ -1711,7 +1711,7 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
|
||||
tu_lrz_sysmem_begin(cmd, cs);
|
||||
tu_lrz_sysmem_begin<CHIP>(cmd, cs);
|
||||
|
||||
assert(fb->width > 0 && fb->height > 0);
|
||||
tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
|
||||
@@ -1730,9 +1730,7 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
tu_cs_emit_regs(cs,
|
||||
A7XX_RB_UNKNOWN_8E06(cmd->device->physical_device->info->a6xx.magic.RB_UNKNOWN_8E06));
|
||||
|
||||
/* These three have something to do with lrz/depth */
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8007(0x0));
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8113(0x4));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
|
||||
@@ -1787,7 +1785,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||
const struct tu_tiling_config *tiling = cmd->state.tiling;
|
||||
tu_lrz_tiling_begin(cmd, cs);
|
||||
tu_lrz_tiling_begin<CHIP>(cmd, cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
|
||||
tu_cs_emit(cs, 0x0);
|
||||
@@ -1799,8 +1797,6 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
A7XX_RB_UNKNOWN_8E06(0x0));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8007(0x0));
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_810B(0x0));
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8113(0x0));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
|
||||
@@ -1923,7 +1919,7 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
|
||||
tu_cs_emit_call(cs, &cmd->draw_epilogue_cs);
|
||||
|
||||
tu_lrz_tiling_end(cmd, cs);
|
||||
tu_lrz_tiling_end<CHIP>(cmd, cs);
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_BLIT_CACHE);
|
||||
|
||||
@@ -4120,7 +4116,7 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
|
||||
if (pass->subpasses[0].feedback_invalidate)
|
||||
cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
|
||||
|
||||
tu_lrz_begin_renderpass(cmd);
|
||||
tu_lrz_begin_renderpass<CHIP>(cmd);
|
||||
|
||||
cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
|
||||
|
||||
@@ -4237,7 +4233,7 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
||||
if (resuming)
|
||||
tu_lrz_begin_resumed_renderpass(cmd);
|
||||
else
|
||||
tu_lrz_begin_renderpass(cmd);
|
||||
tu_lrz_begin_renderpass<CHIP>(cmd);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -51,6 +51,7 @@
|
||||
* before using LRZ.
|
||||
*/
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
|
||||
{
|
||||
@@ -59,6 +60,10 @@ tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
|
||||
A6XX_GRAS_LRZ_BUFFER_BASE(0),
|
||||
A6XX_GRAS_LRZ_BUFFER_PITCH(0),
|
||||
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(0));
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(0));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -71,6 +76,10 @@ tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
|
||||
A6XX_GRAS_LRZ_BUFFER_BASE(.qword = lrz_iova),
|
||||
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = depth_image->lrz_pitch),
|
||||
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(.qword = lrz_fc_iova));
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
// TODO: Figure out the correct value to set here.
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(0));
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -215,6 +224,7 @@ tu_lrz_begin_resumed_renderpass(struct tu_cmd_buffer *cmd)
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
@@ -238,7 +248,7 @@ tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
|
||||
|
||||
for (unsigned i = 0; i < pass->attachment_count; i++) {
|
||||
struct tu_image *image = cmd->state.attachments[i]->image;
|
||||
tu_disable_lrz(cmd, &cmd->cs, image);
|
||||
tu_disable_lrz<CHIP>(cmd, &cmd->cs, image);
|
||||
}
|
||||
|
||||
/* We need a valid LRZ fast-clear base, in case the render pass contents
|
||||
@@ -254,9 +264,10 @@ tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
|
||||
tu_lrz_begin_resumed_renderpass(cmd);
|
||||
|
||||
if (!cmd->state.lrz.valid) {
|
||||
tu6_emit_lrz_buffer(&cmd->cs, NULL);
|
||||
tu6_emit_lrz_buffer<CHIP>(&cmd->cs, NULL);
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_lrz_begin_renderpass);
|
||||
|
||||
void
|
||||
tu_lrz_begin_secondary_cmdbuf(struct tu_cmd_buffer *cmd)
|
||||
@@ -269,6 +280,7 @@ tu_lrz_begin_secondary_cmdbuf(struct tu_cmd_buffer *cmd)
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
@@ -282,7 +294,7 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
|
||||
struct tu_lrz_state *lrz = &cmd->state.lrz;
|
||||
|
||||
tu6_emit_lrz_buffer(cs, lrz->image_view->image);
|
||||
tu6_emit_lrz_buffer<CHIP>(cs, lrz->image_view->image);
|
||||
|
||||
if (lrz->reuse_previous_state) {
|
||||
/* Reuse previous LRZ state, LRZ cache is assumed to be
|
||||
@@ -338,12 +350,14 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
}
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_lrz_tiling_begin);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
if (cmd->state.lrz.fast_clear || cmd->state.lrz.gpu_dir_tracking) {
|
||||
tu6_emit_lrz_buffer(cs, cmd->state.lrz.image_view->image);
|
||||
tu6_emit_lrz_buffer<CHIP>(cs, cmd->state.lrz.image_view->image);
|
||||
|
||||
if (cmd->state.lrz.gpu_dir_tracking) {
|
||||
tu6_write_lrz_reg(cmd, &cmd->cs,
|
||||
@@ -373,7 +387,9 @@ tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
* reason to do such clear.
|
||||
*/
|
||||
}
|
||||
TU_GENX(tu_lrz_tiling_end);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
@@ -387,12 +403,12 @@ tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
struct tu_lrz_state *lrz = &cmd->state.lrz;
|
||||
|
||||
if (cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking) {
|
||||
tu_disable_lrz(cmd, cs, lrz->image_view->image);
|
||||
tu_disable_lrz<CHIP>(cmd, cs, lrz->image_view->image);
|
||||
/* Make sure depth view comparison will fail. */
|
||||
tu6_write_lrz_reg(cmd, cs,
|
||||
A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = 0));
|
||||
} else {
|
||||
tu6_emit_lrz_buffer(cs, lrz->image_view->image);
|
||||
tu6_emit_lrz_buffer<CHIP>(cs, lrz->image_view->image);
|
||||
/* Even though we disable LRZ writes in sysmem mode - there is still
|
||||
* LRZ test, so LRZ should be cleared.
|
||||
*/
|
||||
@@ -408,6 +424,7 @@ tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
}
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_lrz_sysmem_begin);
|
||||
|
||||
void
|
||||
tu_lrz_sysmem_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
@@ -416,6 +433,7 @@ tu_lrz_sysmem_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
}
|
||||
|
||||
/* Disable LRZ outside of renderpass. */
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
struct tu_image *image)
|
||||
@@ -426,11 +444,13 @@ tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
if (!image->lrz_height)
|
||||
return;
|
||||
|
||||
tu6_emit_lrz_buffer(cs, image);
|
||||
tu6_emit_lrz_buffer<CHIP>(cs, image);
|
||||
tu6_disable_lrz_via_depth_view(cmd, cs);
|
||||
}
|
||||
TU_GENX(tu_disable_lrz);
|
||||
|
||||
/* Clear LRZ, used for out of renderpass depth clears. */
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
|
||||
struct tu_image *image,
|
||||
@@ -460,7 +480,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
|
||||
bool fast_clear = image->lrz_fc_size && (pDepthStencil->depth == 0.f ||
|
||||
pDepthStencil->depth == 1.f);
|
||||
|
||||
tu6_emit_lrz_buffer(&cmd->cs, image);
|
||||
tu6_emit_lrz_buffer<CHIP>(&cmd->cs, image);
|
||||
|
||||
tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_DEPTH_VIEW(
|
||||
.base_layer = range->baseArrayLayer,
|
||||
@@ -481,6 +501,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
|
||||
tu6_clear_lrz<A6XX>(cmd, &cmd->cs, image, (const VkClearValue*) pDepthStencil);
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_lrz_clear_depth_image);
|
||||
|
||||
void
|
||||
tu_lrz_disable_during_renderpass(struct tu_cmd_buffer *cmd)
|
||||
|
@@ -45,10 +45,12 @@ struct tu_lrz_state
|
||||
void
|
||||
tu6_emit_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
struct tu_image *image);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
|
||||
struct tu_image *image,
|
||||
@@ -56,6 +58,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
|
||||
uint32_t rangeCount,
|
||||
const VkImageSubresourceRange *pRanges);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd);
|
||||
|
||||
@@ -65,12 +68,15 @@ tu_lrz_begin_resumed_renderpass(struct tu_cmd_buffer *cmd);
|
||||
void
|
||||
tu_lrz_begin_secondary_cmdbuf(struct tu_cmd_buffer *cmd);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
||||
|
||||
|
Reference in New Issue
Block a user