radv, aco: Add uses_full_subgroups to compute shader info.

Allow the compiler to assume that the shader always has full subgroups,
meaning that the initial EXEC mask is -1 in all waves (all lanes enabled).
This assumption is incorrect for ray tracing and internal (meta) shaders
because they can use unaligned dispatch.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20670>
This commit is contained in:
Timur Kristóf
2023-01-10 12:58:52 +01:00
committed by Marge Bot
parent 22b350fa27
commit 39448c8e9c
6 changed files with 17 additions and 1 deletions

View File

@@ -117,6 +117,7 @@ struct aco_shader_info {
} ps;
struct {
uint8_t subgroup_size;
bool uses_full_subgroups;
} cs;
uint32_t gfx9_gs_ring_lds_size;

View File

@@ -87,6 +87,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info,
ASSIGN_FIELD(ps.num_interp);
ASSIGN_FIELD(ps.spi_ps_input);
ASSIGN_FIELD(cs.subgroup_size);
ASSIGN_FIELD(cs.uses_full_subgroups);
aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size;
}

View File

@@ -2471,6 +2471,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
radv_nir_shader_info_init(&stages[i].info);
radv_nir_shader_info_pass(device, stages[i].nir, pipeline_layout, pipeline_key,
pipeline->type,
&stages[i].info);
}
@@ -2998,7 +2999,7 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
struct radv_shader_info info = {0};
radv_nir_shader_info_pass(device, nir, pipeline_layout, pipeline_key, &info);
radv_nir_shader_info_pass(device, nir, pipeline_layout, pipeline_key, pipeline->type, &info);
info.wave_size = 64; /* Wave32 not supported. */
info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
info.so = gs_info->so;

View File

@@ -2848,6 +2848,7 @@ struct radv_shader_info;
void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
const struct radv_pipeline_layout *layout,
const struct radv_pipeline_key *pipeline_key,
const enum radv_pipeline_type pipeline_type,
struct radv_shader_info *info);
void radv_nir_shader_info_init(struct radv_shader_info *info);

View File

@@ -359,6 +359,7 @@ struct radv_shader_info {
bool uses_ray_launch_size;
bool uses_dynamic_rt_callable_stack;
bool uses_rt;
bool uses_full_subgroups;
} cs;
struct {
uint64_t tes_inputs_read;

View File

@@ -683,6 +683,7 @@ void
radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
const struct radv_pipeline_layout *layout,
const struct radv_pipeline_key *pipeline_key,
const enum radv_pipeline_type pipeline_type,
struct radv_shader_info *info)
{
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
@@ -823,6 +824,16 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
case MESA_SHADER_TASK:
info->workgroup_size =
ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
/* Allow the compiler to assume that the shader always has full subgroups,
* meaning that the initial EXEC mask is -1 in all waves (all lanes enabled).
* This assumption is incorrect for ray tracing and internal (meta) shaders
* because they can use unaligned dispatch.
*/
info->cs.uses_full_subgroups =
pipeline_type != RADV_PIPELINE_RAY_TRACING &&
!nir->info.internal &&
(info->workgroup_size % info->wave_size) == 0;
break;
case MESA_SHADER_MESH:
/* Already computed in gather_shader_info_mesh(). */