radv: Calculate VRAM tess patch size independently of LDS size.

We recently made some effort to reduce the LDS use of TCS:
The lowering no longer uses the same output location mapping when
storing TCS outputs to LDS and VRAM. This means that the same
patch will use a different amount of LDS and VRAM.

Therefore, we need to properly calculate the patch size in VRAM
when determining the number of output patches.

Fixes: 0e481a4adc
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28739>
This commit is contained in:
Timur Kristóf
2024-04-15 12:31:01 +02:00
committed by Marge Bot
parent 8190a65c78
commit 2d9e38dbe5
5 changed files with 47 additions and 8 deletions

View File

@@ -471,6 +471,23 @@ radv_gather_unlinked_io_mask(const uint64_t nir_io_mask)
return radv_io_mask;
}
uint64_t
radv_gather_unlinked_patch_io_mask(const uint64_t nir_io_mask, const uint32_t nir_patch_io_mask)
{
uint64_t radv_io_mask = 0;
u_foreach_bit64 (semantic, nir_patch_io_mask) {
radv_io_mask |= BITFIELD64_BIT(radv_map_io_driver_location(semantic + VARYING_SLOT_PATCH0));
}
/* Tess levels need to be handled separately because they are not part of patch_outputs_written. */
if (nir_io_mask & VARYING_BIT_TESS_LEVEL_OUTER)
radv_io_mask |= BITFIELD64_BIT(radv_map_io_driver_location(VARYING_SLOT_TESS_LEVEL_OUTER));
if (nir_io_mask & VARYING_BIT_TESS_LEVEL_INNER)
radv_io_mask |= BITFIELD64_BIT(radv_map_io_driver_location(VARYING_SLOT_TESS_LEVEL_INNER));
return radv_io_mask;
}
static void
gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage_key *stage_key,
@@ -538,16 +555,20 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
if (!info->inputs_linked)
info->tcs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read));
if (!info->outputs_linked)
if (!info->outputs_linked) {
info->tcs.num_linked_outputs = util_last_bit64(radv_gather_unlinked_io_mask(
nir->info.outputs_written & ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER)));
info->tcs.num_linked_patch_outputs = util_last_bit64(
radv_gather_unlinked_patch_io_mask(nir->info.outputs_written, nir->info.patch_outputs_written));
}
if (gfx_state->ts.patch_control_points) {
/* Number of tessellation patches per workgroup processed by the current pipeline. */
info->num_tess_patches = get_tcs_num_patches(
gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs, pdev->hs.tess_offchip_block_dw_size,
pdev->info.gfx_level, pdev->info.family);
info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs, info->tcs.num_linked_outputs,
info->tcs.num_linked_patch_outputs, pdev->hs.tess_offchip_block_dw_size, pdev->info.gfx_level,
pdev->info.family);
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
info->tcs.num_lds_blocks =