diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 46b53429294..d781344be45 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -2710,6 +2710,9 @@ static void tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t attachment, + uint32_t base_layer, + uint32_t layers, + uint32_t layer_mask, VkImageAspectFlags mask, const VkClearValue *value) { @@ -2722,14 +2725,21 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(att->samples))); enum pipe_format format = tu_vk_format_to_pipe_format(att->format); - if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) - clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, tu_attachment_gmem_offset(cmd, att), value); - if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) - clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, tu_attachment_gmem_offset_stencil(cmd, att), value); - } else { - clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask), - tu_attachment_gmem_offset(cmd, att), value); + for_each_layer(i, layer_mask, layers) { + uint32_t layer = i + base_layer; + if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) { + clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, + tu_attachment_gmem_offset(cmd, att, layer), value); + } + if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) { + clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, + tu_attachment_gmem_offset_stencil(cmd, att, layer), value); + } + } else { + clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask), + tu_attachment_gmem_offset(cmd, att, layer), value); + } } trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples); @@ -2768,7 +2778,10 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, if (a == VK_ATTACHMENT_UNUSED) continue; - tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask, + tu_emit_clear_gmem_attachment(cmd, cs, a, rects[i].baseArrayLayer, + rects[i].layerCount, + subpass->multiview_mask, + attachments[j].aspectMask, &attachments[j].clearValue); } } @@ -2946,7 +2959,9 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, if (!attachment->clear_mask) return; - tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask, value); + tu_emit_clear_gmem_attachment(cmd, cs, a, 0, cmd->state.framebuffer->layers, + attachment->clear_views, + attachment->clear_mask, value); } static void @@ -2966,37 +2981,39 @@ tu_emit_blit(struct tu_cmd_buffer *cmd, .sample_0 = vk_format_is_int(attachment->format) || vk_format_is_depth_or_stencil(attachment->format))); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4); - if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - if (!separate_stencil) { - tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO)); - tu_cs_emit_qw(cs, iview->depth_base_addr); - tu_cs_emit(cs, iview->depth_PITCH); + for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) { + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4); + if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (!separate_stencil) { + tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO)); + tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * i); + tu_cs_emit(cs, iview->depth_PITCH); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); + tu_cs_image_flag_ref(cs, &iview->view, i); + } else { + tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS); + tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * i); + tu_cs_emit(cs, iview->stencil_PITCH); + } + } else { + tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO); + tu_cs_image_ref_2d(cs, &iview->view, i, false); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); - tu_cs_image_flag_ref(cs, &iview->view, 0); - } else { - tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS); - tu_cs_emit_qw(cs, iview->stencil_base_addr); - tu_cs_emit(cs, iview->stencil_PITCH); + tu_cs_image_flag_ref(cs, &iview->view, i); } - } else { - tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO); - tu_cs_image_ref_2d(cs, &iview->view, 0, false); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); - tu_cs_image_flag_ref(cs, &iview->view, 0); - } - - if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) { + if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) { + tu_cs_emit_regs(cs, + A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment, i))); + } else { tu_cs_emit_regs(cs, - A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment))); - } else { - tu_cs_emit_regs(cs, - A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment))); - } + A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment, i))); + } - tu6_emit_event_write(cmd, cs, BLIT); + tu6_emit_event_write(cmd, cs, BLIT); + } } static bool @@ -3132,6 +3149,7 @@ store_cp_blit(struct tu_cmd_buffer *cmd, bool separate_stencil, enum pipe_format src_format, enum pipe_format dst_format, + uint32_t layer, uint32_t gmem_offset, uint32_t cpp) { @@ -3140,12 +3158,12 @@ store_cp_blit(struct tu_cmd_buffer *cmd, if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (!separate_stencil) { - r2d_dst_depth(cs, iview, 0); + r2d_dst_depth(cs, iview, layer); } else { - r2d_dst_stencil(cs, iview, 0); + r2d_dst_stencil(cs, iview, layer); } } else { - r2d_dst(cs, &iview->view, 0, src_format); + r2d_dst(cs, &iview->view, layer, src_format); } enum a6xx_format fmt = tu6_format_texture(src_format, TILE6_2).fmt; @@ -3192,6 +3210,7 @@ store_3d_blit(struct tu_cmd_buffer *cmd, enum pipe_format src_format, enum pipe_format dst_format, const VkRect2D *render_area, + uint32_t layer, uint32_t gmem_offset, uint32_t cpp) { @@ -3213,12 +3232,12 @@ store_3d_blit(struct tu_cmd_buffer *cmd, if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (!separate_stencil) { - r3d_dst_depth(cs, iview, 0); + r3d_dst_depth(cs, iview, layer); } else { - r3d_dst_stencil(cs, iview, 0); + r3d_dst_stencil(cs, iview, layer); } } else { - r3d_dst(cs, &iview->view, 0, src_format); + r3d_dst(cs, &iview->view, layer, src_format); } r3d_src_gmem(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp); @@ -3312,6 +3331,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a, uint32_t gmem_a, + uint32_t layers, + uint32_t layer_mask, bool cond_exec_allowed) { const VkRect2D *render_area = &cmd->state.render_area; @@ -3389,25 +3410,29 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, if (store_common || store_separate_stencil) tu_disable_draw_states(cmd, cs); - if (store_common) { - store_3d_blit(cmd, cs, iview, dst->samples, false, src_format, - dst_format, render_area, tu_attachment_gmem_offset(cmd, src), src->cpp); - } - if (store_separate_stencil) { - store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT, - PIPE_FORMAT_S8_UINT, render_area, - tu_attachment_gmem_offset_stencil(cmd, src), src->samples); + for_each_layer(i, layer_mask, layers) { + if (store_common) { + store_3d_blit(cmd, cs, iview, dst->samples, false, src_format, + dst_format, render_area, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp); + } + if (store_separate_stencil) { + store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT, + PIPE_FORMAT_S8_UINT, render_area, i, + tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples); + } } } else { r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); - if (store_common) { - store_cp_blit(cmd, cs, iview, src->samples, false, src_format, - dst_format, tu_attachment_gmem_offset(cmd, src), src->cpp); - } - if (store_separate_stencil) { - store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT, - PIPE_FORMAT_S8_UINT, tu_attachment_gmem_offset_stencil(cmd, src), src->samples); + for_each_layer(i, layer_mask, layers) { + if (store_common) { + store_cp_blit(cmd, cs, iview, src->samples, false, src_format, + dst_format, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp); + } + if (store_separate_stencil) { + store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT, + PIPE_FORMAT_S8_UINT, i, tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples); + } } } diff --git a/src/freedreno/vulkan/tu_clear_blit.h b/src/freedreno/vulkan/tu_clear_blit.h index 77a289afa43..bab3a15c685 100644 --- a/src/freedreno/vulkan/tu_clear_blit.h +++ b/src/freedreno/vulkan/tu_clear_blit.h @@ -56,6 +56,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a, uint32_t gmem_a, + uint32_t layers, + uint32_t layer_mask, bool cond_exec_allowed); void diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index dc5a026b06e..885a14e980d 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -283,7 +283,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, tu_cs_image_depth_ref(cs, iview, 0); else tu_cs_image_ref(cs, &iview->view, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment)); + tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0)); tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt)); @@ -298,10 +298,10 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, A6XX_RB_STENCIL_INFO(.separate_stencil = true).value); if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { tu_cs_image_stencil_ref(cs, iview, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment)); + tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment, 0)); } else { tu_cs_image_ref(cs, &iview->view, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment)); + tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0)); } } else { tu_cs_emit_regs(cs, @@ -347,7 +347,7 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); tu_cs_emit(cs, iview->view.RB_MRT_BUF_INFO); tu_cs_image_ref(cs, &iview->view, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a])); + tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a], 0)); tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(i, .dword = iview->view.SP_FS_MRT_REG)); @@ -685,7 +685,8 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd, return true; /* can't fit attachments into gmem */ - if (!cmd->state.pass->gmem_pixels[cmd->state.gmem_layout]) + if (!cmd->state.pass->gmem_pixels[cmd->state.gmem_layout] || + !cmd->state.tiling->possible) return true; if (cmd->state.framebuffer->layers > 1) @@ -863,6 +864,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { const struct tu_render_pass *pass = cmd->state.pass; const struct tu_subpass *subpass = &pass->subpasses[pass->subpass_count-1]; + const struct tu_framebuffer *fb = cmd->state.framebuffer; tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE)); @@ -870,8 +872,11 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_blit_scissor(cmd, cs, true); for (uint32_t a = 0; a < pass->attachment_count; ++a) { - if (pass->attachments[a].gmem) - tu_store_gmem_attachment(cmd, cs, a, a, cmd->state.tiling->binning_possible); + if (pass->attachments[a].gmem) { + tu_store_gmem_attachment(cmd, cs, a, a, + fb->layers, subpass->multiview_mask, + cmd->state.tiling->binning_possible); + } } if (subpass->resolve_attachments) { @@ -879,7 +884,8 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) uint32_t a = subpass->resolve_attachments[i].attachment; if (a != VK_ATTACHMENT_UNUSED) { uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i); - tu_store_gmem_attachment(cmd, cs, a, gmem_a, false); + tu_store_gmem_attachment(cmd, cs, a, gmem_a, fb->layers, + subpass->multiview_mask, false); } } } @@ -1195,7 +1201,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd, const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a]; uint32_t *dst = &texture.map[A6XX_TEX_CONST_DWORDS * i]; - uint32_t gmem_offset = tu_attachment_gmem_offset(cmd, att); + uint32_t gmem_offset = tu_attachment_gmem_offset(cmd, att, 0); uint32_t cpp = att->cpp; memcpy(dst, iview->view.descriptor, A6XX_TEX_CONST_DWORDS * 4); @@ -1265,6 +1271,9 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd, dst[2] = A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | A6XX_TEX_CONST_2_PITCH(tiling->tile0.width * cpp); + /* Note: it seems the HW implicitly calculates the array pitch with the + * GMEM tiling, so we don't need to specify the pitch ourselves. + */ dst[3] = 0; dst[4] = cmd->device->physical_device->gmem_base + gmem_offset; dst[5] = A6XX_TEX_CONST_5_DEPTH(1); @@ -4378,6 +4387,7 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, } const struct tu_render_pass *pass = cmd->state.pass; + const struct tu_framebuffer *fb = cmd->state.framebuffer; struct tu_cs *cs = &cmd->draw_cs; const struct tu_subpass *last_subpass = cmd->state.subpass; @@ -4405,7 +4415,8 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i); - tu_store_gmem_attachment(cmd, cs, a, gmem_a, false); + tu_store_gmem_attachment(cmd, cs, a, gmem_a, fb->layers, + subpass->multiview_mask, false); if (!pass->attachments[a].gmem) continue; diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 9b89b71baac..7164b901305 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -609,18 +609,23 @@ extern const struct vk_command_buffer_ops tu_cmd_buffer_ops; static inline uint32_t tu_attachment_gmem_offset(struct tu_cmd_buffer *cmd, - const struct tu_render_pass_attachment *att) + const struct tu_render_pass_attachment *att, + uint32_t layer) { assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT); - return att->gmem_offset[cmd->state.gmem_layout]; + return att->gmem_offset[cmd->state.gmem_layout] + + layer * cmd->state.tiling->tile0.width * cmd->state.tiling->tile0.height * + att->cpp; } static inline uint32_t tu_attachment_gmem_offset_stencil(struct tu_cmd_buffer *cmd, - const struct tu_render_pass_attachment *att) + const struct tu_render_pass_attachment *att, + uint32_t layer) { assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT); - return att->gmem_offset_stencil[cmd->state.gmem_layout]; + return att->gmem_offset_stencil[cmd->state.gmem_layout] + + layer * cmd->state.tiling->tile0.width * cmd->state.tiling->tile0.height; } void tu_render_pass_state_merge(struct tu_render_pass_state *dst, diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index ff266931ca8..1b6399175c8 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -380,6 +380,9 @@ struct tu_tiling_config { /* number of VSC pipes */ VkExtent2D pipe_count; + /* Whether using GMEM is even possible with this configuration */ + bool possible; + /* Whether binning should be used for gmem rendering using this framebuffer. */ bool binning; diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index 84c1c306129..a9517c75f80 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -550,20 +550,6 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass, { for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT; layout++) { - /* From the VK_KHR_multiview spec: - * - * Multiview is all-or-nothing for a render pass - that is, either all - * subpasses must have a non-zero view mask (though some subpasses may - * have only one view) or all must be zero. - * - * This means we only have to check one of the view masks. - */ - if (pass->subpasses[0].multiview_mask) { - /* It seems multiview must use sysmem rendering. */ - pass->gmem_pixels[layout] = 0; - continue; - } - /* log2(gmem_align/(tile_align_w*tile_align_h)) */ uint32_t block_align_shift = 3; uint32_t tile_align_w = phys_dev->info->tile_align_w; @@ -572,14 +558,17 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass, /* calculate total bytes per pixel */ uint32_t cpp_total = 0; + uint32_t min_cpp = UINT32_MAX; for (uint32_t i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; bool cpp1 = (att->cpp == 1); if (att->gmem) { cpp_total += att->cpp; + min_cpp = MIN2(min_cpp, att->cpp); /* take into account the separate stencil: */ if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + min_cpp = MIN2(min_cpp, att->samples); cpp1 = (att->samples == 1); cpp_total += att->samples; } @@ -596,6 +585,7 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass, } pass->tile_align_w = tile_align_w; + pass->min_cpp = min_cpp; /* no gmem attachments */ if (cpp_total == 0) { diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index 06f1185a115..a43288ef4b1 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -100,6 +100,7 @@ struct tu_render_pass uint32_t subpass_count; uint32_t gmem_pixels[TU_GMEM_LAYOUT_COUNT]; uint32_t tile_align_w; + uint32_t min_cpp; uint64_t autotune_hash; /* memory bandwidth costs (in bytes) for gmem / sysmem rendering */ diff --git a/src/freedreno/vulkan/tu_util.c b/src/freedreno/vulkan/tu_util.c index 9b0b9a42068..537bf6e0e98 100644 --- a/src/freedreno/vulkan/tu_util.c +++ b/src/freedreno/vulkan/tu_util.c @@ -67,11 +67,49 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb, enum tu_gmem_layout gmem_layout) { const uint32_t tile_align_w = pass->tile_align_w; - const uint32_t tile_align_h = dev->physical_device->info->tile_align_h; + uint32_t tile_align_h = dev->physical_device->info->tile_align_h; const uint32_t max_tile_width = dev->physical_device->info->tile_max_w; const uint32_t max_tile_height = dev->physical_device->info->tile_max_h; struct tu_tiling_config *tiling = &fb->tiling[gmem_layout]; + /* From the Vulkan 1.3.232 spec, under VkFramebufferCreateInfo: + * + * If the render pass uses multiview, then layers must be one and each + * attachment requires a number of layers that is greater than the + * maximum bit index set in the view mask in the subpasses in which it is + * used. + */ + + uint32_t layers = fb->layers; + if (pass->subpasses[0].multiview_mask) { + uint32_t view_mask = 0; + for (unsigned i = 0; i < pass->subpass_count; i++) + view_mask |= pass->subpasses[i].multiview_mask; + layers = util_logbase2(view_mask) + 1; + } + + /* If there is more than one layer, we need to make sure that the layer + * stride is expressible as an offset in RB_BLIT_BASE_GMEM which ignores + * the low 12 bits. The layer stride seems to be implicitly calculated from + * the tile width and height so we need to adjust one of them. + */ + const uint32_t gmem_align_log2 = 12; + const uint32_t gmem_align = 1 << gmem_align_log2; + uint32_t min_layer_stride = tile_align_h * tile_align_w * pass->min_cpp; + if (layers > 1 && align(min_layer_stride, gmem_align) != min_layer_stride) { + /* Make sure that min_layer_stride is a multiple of gmem_align. Because + * gmem_align is a power of two and min_layer_stride isn't already a + * multiple of gmem_align, this is equivalent to shifting tile_align_h + * until the number of 0 bits at the bottom of min_layer_stride is at + * least gmem_align_log2. + */ + tile_align_h <<= gmem_align_log2 - (ffs(min_layer_stride) - 1); + + /* Check that we did the math right. */ + min_layer_stride = tile_align_h * tile_align_w * pass->min_cpp; + assert(align(min_layer_stride, gmem_align) == min_layer_stride); + } + /* start from 1 tile */ tiling->tile_count = (VkExtent2D) { .width = 1, @@ -110,16 +148,23 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb, util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); } + tiling->possible = true; + /* do not exceed gmem size */ - while (tiling->tile0.width * tiling->tile0.height > pass->gmem_pixels[gmem_layout]) { + while (tiling->tile0.width * tiling->tile0.height * layers > pass->gmem_pixels[gmem_layout]) { if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) { tiling->tile_count.width++; tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w); } else { - /* if this assert fails then layout is impossible.. */ - assert(tiling->tile0.height > tile_align_h); tiling->tile_count.height++; + if (DIV_ROUND_UP(fb->height, tiling->tile_count.height) < tile_align_h) { + /* Tiling is impossible. This may happen when there is more than + * one layer. + */ + tiling->possible = false; + return; + } tiling->tile0.height = align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h); }