tu: Disable LRZ properly on A7XX

LRZ wasn't entirely disabled due to the register `A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO`
not being set to `0` in all circumstances, this register affects rendering even
when LRZ is disabled so needs to be set to `0` until LRZ is properly implemented.

Signed-off-by: Mark Collins <mark@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26461>
This commit is contained in:
Mark Collins
2024-01-24 07:25:45 +00:00
committed by Marge Bot
parent 3188c1b5c7
commit 265eb463b5
5 changed files with 51 additions and 24 deletions

View File

@@ -2340,7 +2340,7 @@ to upconvert to 32b float internally?
<!-- A bit tentative but it's a color and it is followed by LRZ_CLEAR -->
<reg32 offset="0x8111" name="GRAS_LRZ_CLEAR_DEPTH_F32" type="float" variants="A7XX-"/>
<reg32 offset="0x8113" name="GRAS_UNKNOWN_8113" variants="A7XX-" usage="rp_blit"/>
<reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"/>
<!-- Always written together and always equal 09510840 00000a62 -->
<reg32 offset="0x8120" name="GRAS_UNKNOWN_8120" variants="A7XX-" usage="cmd"/>

View File

@@ -1317,6 +1317,7 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
}
template <chip CHIP>
static void
r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
enum pipe_format src_format)
@@ -1338,6 +1339,9 @@ r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
tu_cs_image_flag_ref(cs, iview, layer);
if (CHIP >= A7XX)
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(0));
/* Use color format from RB_MRT_BUF_INFO. This register is relevant for
* FMT6_NV12_Y.
*/
@@ -1657,7 +1661,7 @@ static const struct blit_ops r3d_ops = {
.clear_value = r3d_clear_value,
.src = r3d_src,
.src_buffer = r3d_src_buffer<CHIP>,
.dst = r3d_dst,
.dst = r3d_dst<CHIP>,
.dst_depth = r3d_dst_depth,
.dst_stencil = r3d_dst_stencil,
.dst_buffer = r3d_dst_buffer,
@@ -2015,7 +2019,7 @@ tu_CmdBlitImage2(VkCommandBuffer commandBuffer,
}
if (dst_image->lrz_height) {
tu_disable_lrz(cmd, &cmd->cs, dst_image);
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
}
}
TU_GENX(tu_CmdBlitImage2);
@@ -2133,7 +2137,7 @@ tu_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
pCopyBufferToImageInfo->pRegions + i);
if (dst_image->lrz_height) {
tu_disable_lrz(cmd, &cmd->cs, dst_image);
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
}
}
TU_GENX(tu_CmdCopyBufferToImage2);
@@ -2477,7 +2481,7 @@ tu_CmdCopyImage2(VkCommandBuffer commandBuffer,
}
if (dst_image->lrz_height) {
tu_disable_lrz(cmd, &cmd->cs, dst_image);
tu_disable_lrz<CHIP>(cmd, &cmd->cs, dst_image);
}
}
TU_GENX(tu_CmdCopyImage2);
@@ -2840,7 +2844,7 @@ tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
clear_image<CHIP>(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
}
tu_lrz_clear_depth_image(cmd, image, pDepthStencil, rangeCount, pRanges);
tu_lrz_clear_depth_image<CHIP>(cmd, image, pDepthStencil, rangeCount, pRanges);
}
TU_GENX(tu_CmdClearDepthStencilImage);
@@ -3799,7 +3803,7 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
r3d_dst_stencil(cs, iview, layer);
}
} else {
r3d_dst(cs, &iview->view, layer, src_format);
r3d_dst<CHIP>(cs, &iview->view, layer, src_format);
}
r3d_src_gmem<CHIP>(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp);

View File

@@ -1711,7 +1711,7 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
tu_lrz_sysmem_begin(cmd, cs);
tu_lrz_sysmem_begin<CHIP>(cmd, cs);
assert(fb->width > 0 && fb->height > 0);
tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
@@ -1730,9 +1730,7 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_cs_emit_regs(cs,
A7XX_RB_UNKNOWN_8E06(cmd->device->physical_device->info->a6xx.magic.RB_UNKNOWN_8E06));
/* These three have something to do with lrz/depth */
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8007(0x0));
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8113(0x4));
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
@@ -1787,7 +1785,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
{
struct tu_physical_device *phys_dev = cmd->device->physical_device;
const struct tu_tiling_config *tiling = cmd->state.tiling;
tu_lrz_tiling_begin(cmd, cs);
tu_lrz_tiling_begin<CHIP>(cmd, cs);
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
tu_cs_emit(cs, 0x0);
@@ -1799,8 +1797,6 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
A7XX_RB_UNKNOWN_8E06(0x0));
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8007(0x0));
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_810B(0x0));
tu_cs_emit_regs(cs, A7XX_GRAS_UNKNOWN_8113(0x0));
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
@@ -1923,7 +1919,7 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_cs_emit_call(cs, &cmd->draw_epilogue_cs);
tu_lrz_tiling_end(cmd, cs);
tu_lrz_tiling_end<CHIP>(cmd, cs);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_BLIT_CACHE);
@@ -4120,7 +4116,7 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
if (pass->subpasses[0].feedback_invalidate)
cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
tu_lrz_begin_renderpass(cmd);
tu_lrz_begin_renderpass<CHIP>(cmd);
cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
@@ -4237,7 +4233,7 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
if (resuming)
tu_lrz_begin_resumed_renderpass(cmd);
else
tu_lrz_begin_renderpass(cmd);
tu_lrz_begin_renderpass<CHIP>(cmd);
}

View File

@@ -51,6 +51,7 @@
* before using LRZ.
*/
template <chip CHIP>
static void
tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
{
@@ -59,6 +60,10 @@ tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
A6XX_GRAS_LRZ_BUFFER_BASE(0),
A6XX_GRAS_LRZ_BUFFER_PITCH(0),
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(0));
if (CHIP >= A7XX)
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(0));
return;
}
@@ -71,6 +76,10 @@ tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image)
A6XX_GRAS_LRZ_BUFFER_BASE(.qword = lrz_iova),
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = depth_image->lrz_pitch),
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(.qword = lrz_fc_iova));
if (CHIP >= A7XX)
// TODO: Figure out the correct value to set here.
tu_cs_emit_regs(cs, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(0));
}
static void
@@ -215,6 +224,7 @@ tu_lrz_begin_resumed_renderpass(struct tu_cmd_buffer *cmd)
}
}
template <chip CHIP>
void
tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
{
@@ -238,7 +248,7 @@ tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
for (unsigned i = 0; i < pass->attachment_count; i++) {
struct tu_image *image = cmd->state.attachments[i]->image;
tu_disable_lrz(cmd, &cmd->cs, image);
tu_disable_lrz<CHIP>(cmd, &cmd->cs, image);
}
/* We need a valid LRZ fast-clear base, in case the render pass contents
@@ -254,9 +264,10 @@ tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd)
tu_lrz_begin_resumed_renderpass(cmd);
if (!cmd->state.lrz.valid) {
tu6_emit_lrz_buffer(&cmd->cs, NULL);
tu6_emit_lrz_buffer<CHIP>(&cmd->cs, NULL);
}
}
TU_GENX(tu_lrz_begin_renderpass);
void
tu_lrz_begin_secondary_cmdbuf(struct tu_cmd_buffer *cmd)
@@ -269,6 +280,7 @@ tu_lrz_begin_secondary_cmdbuf(struct tu_cmd_buffer *cmd)
}
}
template <chip CHIP>
void
tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
@@ -282,7 +294,7 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
struct tu_lrz_state *lrz = &cmd->state.lrz;
tu6_emit_lrz_buffer(cs, lrz->image_view->image);
tu6_emit_lrz_buffer<CHIP>(cs, lrz->image_view->image);
if (lrz->reuse_previous_state) {
/* Reuse previous LRZ state, LRZ cache is assumed to be
@@ -338,12 +350,14 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
}
}
TU_GENX(tu_lrz_tiling_begin);
template <chip CHIP>
void
tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
if (cmd->state.lrz.fast_clear || cmd->state.lrz.gpu_dir_tracking) {
tu6_emit_lrz_buffer(cs, cmd->state.lrz.image_view->image);
tu6_emit_lrz_buffer<CHIP>(cs, cmd->state.lrz.image_view->image);
if (cmd->state.lrz.gpu_dir_tracking) {
tu6_write_lrz_reg(cmd, &cmd->cs,
@@ -373,7 +387,9 @@ tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
* reason to do such clear.
*/
}
TU_GENX(tu_lrz_tiling_end);
template <chip CHIP>
void
tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
@@ -387,12 +403,12 @@ tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
struct tu_lrz_state *lrz = &cmd->state.lrz;
if (cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking) {
tu_disable_lrz(cmd, cs, lrz->image_view->image);
tu_disable_lrz<CHIP>(cmd, cs, lrz->image_view->image);
/* Make sure depth view comparison will fail. */
tu6_write_lrz_reg(cmd, cs,
A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = 0));
} else {
tu6_emit_lrz_buffer(cs, lrz->image_view->image);
tu6_emit_lrz_buffer<CHIP>(cs, lrz->image_view->image);
/* Even though we disable LRZ writes in sysmem mode - there is still
* LRZ test, so LRZ should be cleared.
*/
@@ -408,6 +424,7 @@ tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
}
}
TU_GENX(tu_lrz_sysmem_begin);
void
tu_lrz_sysmem_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
@@ -416,6 +433,7 @@ tu_lrz_sysmem_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
/* Disable LRZ outside of renderpass. */
template <chip CHIP>
void
tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
struct tu_image *image)
@@ -426,11 +444,13 @@ tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
if (!image->lrz_height)
return;
tu6_emit_lrz_buffer(cs, image);
tu6_emit_lrz_buffer<CHIP>(cs, image);
tu6_disable_lrz_via_depth_view(cmd, cs);
}
TU_GENX(tu_disable_lrz);
/* Clear LRZ, used for out of renderpass depth clears. */
template <chip CHIP>
void
tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
struct tu_image *image,
@@ -460,7 +480,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
bool fast_clear = image->lrz_fc_size && (pDepthStencil->depth == 0.f ||
pDepthStencil->depth == 1.f);
tu6_emit_lrz_buffer(&cmd->cs, image);
tu6_emit_lrz_buffer<CHIP>(&cmd->cs, image);
tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_DEPTH_VIEW(
.base_layer = range->baseArrayLayer,
@@ -481,6 +501,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
tu6_clear_lrz<A6XX>(cmd, &cmd->cs, image, (const VkClearValue*) pDepthStencil);
}
}
TU_GENX(tu_lrz_clear_depth_image);
void
tu_lrz_disable_during_renderpass(struct tu_cmd_buffer *cmd)

View File

@@ -45,10 +45,12 @@ struct tu_lrz_state
void
tu6_emit_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
template <chip CHIP>
void
tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
struct tu_image *image);
template <chip CHIP>
void
tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
struct tu_image *image,
@@ -56,6 +58,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges);
template <chip CHIP>
void
tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd);
@@ -65,12 +68,15 @@ tu_lrz_begin_resumed_renderpass(struct tu_cmd_buffer *cmd);
void
tu_lrz_begin_secondary_cmdbuf(struct tu_cmd_buffer *cmd);
template <chip CHIP>
void
tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
template <chip CHIP>
void
tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
template <chip CHIP>
void
tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs);