radv: Calculate VRAM tess patch size independently of LDS size.

We recently made some effort to reduce the LDS use of TCS: The lowering no longer uses the same output location mapping when storing TCS outputs to LDS and VRAM. This means that the same patch will use a different amount of LDS and VRAM. Therefore, we need to properly calculate the patch size in VRAM when determining the number of output patches. Fixes: 0e481a4adc Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28739>
2024-04-15 12:31:01 +02:00
parent 8190a65c78
commit 2d9e38dbe5
5 changed files with 47 additions and 8 deletions
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -471,6 +471,23 @@ radv_gather_unlinked_io_mask(const uint64_t nir_io_mask)
   return radv_io_mask;
 }

+uint64_t
+radv_gather_unlinked_patch_io_mask(const uint64_t nir_io_mask, const uint32_t nir_patch_io_mask)
+{
+   uint64_t radv_io_mask = 0;
+   u_foreach_bit64 (semantic, nir_patch_io_mask) {
+      radv_io_mask |= BITFIELD64_BIT(radv_map_io_driver_location(semantic + VARYING_SLOT_PATCH0));
+   }
+
+   /* Tess levels need to be handled separately because they are not part of patch_outputs_written. */
+   if (nir_io_mask & VARYING_BIT_TESS_LEVEL_OUTER)
+      radv_io_mask |= BITFIELD64_BIT(radv_map_io_driver_location(VARYING_SLOT_TESS_LEVEL_OUTER));
+   if (nir_io_mask & VARYING_BIT_TESS_LEVEL_INNER)
+      radv_io_mask |= BITFIELD64_BIT(radv_map_io_driver_location(VARYING_SLOT_TESS_LEVEL_INNER));
+
+   return radv_io_mask;
+}
+
 static void
 gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
                      const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage_key *stage_key,
@@ -538,16 +555,20 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,

   if (!info->inputs_linked)
      info->tcs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read));
-   if (!info->outputs_linked)
+   if (!info->outputs_linked) {
      info->tcs.num_linked_outputs = util_last_bit64(radv_gather_unlinked_io_mask(
         nir->info.outputs_written & ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER)));
+      info->tcs.num_linked_patch_outputs = util_last_bit64(
+         radv_gather_unlinked_patch_io_mask(nir->info.outputs_written, nir->info.patch_outputs_written));
+   }

   if (gfx_state->ts.patch_control_points) {
      /* Number of tessellation patches per workgroup processed by the current pipeline. */
      info->num_tess_patches = get_tcs_num_patches(
         gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
-         info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs, pdev->hs.tess_offchip_block_dw_size,
-         pdev->info.gfx_level, pdev->info.family);
+         info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs, info->tcs.num_linked_outputs,
+         info->tcs.num_linked_patch_outputs, pdev->hs.tess_offchip_block_dw_size, pdev->info.gfx_level,
+         pdev->info.family);

      /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
      info->tcs.num_lds_blocks =