radv, aco: Add uses_full_subgroups to compute shader info.
Allow the compiler to assume that the shader always has full subgroups, meaning that the initial EXEC mask is -1 in all waves (all lanes enabled). This assumption is incorrect for ray tracing and internal (meta) shaders because they can use unaligned dispatch. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20670>
This commit is contained in:
@@ -117,6 +117,7 @@ struct aco_shader_info {
|
|||||||
} ps;
|
} ps;
|
||||||
struct {
|
struct {
|
||||||
uint8_t subgroup_size;
|
uint8_t subgroup_size;
|
||||||
|
bool uses_full_subgroups;
|
||||||
} cs;
|
} cs;
|
||||||
|
|
||||||
uint32_t gfx9_gs_ring_lds_size;
|
uint32_t gfx9_gs_ring_lds_size;
|
||||||
|
@@ -87,6 +87,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info,
|
|||||||
ASSIGN_FIELD(ps.num_interp);
|
ASSIGN_FIELD(ps.num_interp);
|
||||||
ASSIGN_FIELD(ps.spi_ps_input);
|
ASSIGN_FIELD(ps.spi_ps_input);
|
||||||
ASSIGN_FIELD(cs.subgroup_size);
|
ASSIGN_FIELD(cs.subgroup_size);
|
||||||
|
ASSIGN_FIELD(cs.uses_full_subgroups);
|
||||||
aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size;
|
aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -2471,6 +2471,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
|||||||
|
|
||||||
radv_nir_shader_info_init(&stages[i].info);
|
radv_nir_shader_info_init(&stages[i].info);
|
||||||
radv_nir_shader_info_pass(device, stages[i].nir, pipeline_layout, pipeline_key,
|
radv_nir_shader_info_pass(device, stages[i].nir, pipeline_layout, pipeline_key,
|
||||||
|
pipeline->type,
|
||||||
&stages[i].info);
|
&stages[i].info);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2998,7 +2999,7 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
|
|||||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||||
|
|
||||||
struct radv_shader_info info = {0};
|
struct radv_shader_info info = {0};
|
||||||
radv_nir_shader_info_pass(device, nir, pipeline_layout, pipeline_key, &info);
|
radv_nir_shader_info_pass(device, nir, pipeline_layout, pipeline_key, pipeline->type, &info);
|
||||||
info.wave_size = 64; /* Wave32 not supported. */
|
info.wave_size = 64; /* Wave32 not supported. */
|
||||||
info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
|
info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
|
||||||
info.so = gs_info->so;
|
info.so = gs_info->so;
|
||||||
|
@@ -2848,6 +2848,7 @@ struct radv_shader_info;
|
|||||||
void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
|
void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
|
||||||
const struct radv_pipeline_layout *layout,
|
const struct radv_pipeline_layout *layout,
|
||||||
const struct radv_pipeline_key *pipeline_key,
|
const struct radv_pipeline_key *pipeline_key,
|
||||||
|
const enum radv_pipeline_type pipeline_type,
|
||||||
struct radv_shader_info *info);
|
struct radv_shader_info *info);
|
||||||
|
|
||||||
void radv_nir_shader_info_init(struct radv_shader_info *info);
|
void radv_nir_shader_info_init(struct radv_shader_info *info);
|
||||||
|
@@ -359,6 +359,7 @@ struct radv_shader_info {
|
|||||||
bool uses_ray_launch_size;
|
bool uses_ray_launch_size;
|
||||||
bool uses_dynamic_rt_callable_stack;
|
bool uses_dynamic_rt_callable_stack;
|
||||||
bool uses_rt;
|
bool uses_rt;
|
||||||
|
bool uses_full_subgroups;
|
||||||
} cs;
|
} cs;
|
||||||
struct {
|
struct {
|
||||||
uint64_t tes_inputs_read;
|
uint64_t tes_inputs_read;
|
||||||
|
@@ -683,6 +683,7 @@ void
|
|||||||
radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
|
radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
|
||||||
const struct radv_pipeline_layout *layout,
|
const struct radv_pipeline_layout *layout,
|
||||||
const struct radv_pipeline_key *pipeline_key,
|
const struct radv_pipeline_key *pipeline_key,
|
||||||
|
const enum radv_pipeline_type pipeline_type,
|
||||||
struct radv_shader_info *info)
|
struct radv_shader_info *info)
|
||||||
{
|
{
|
||||||
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
|
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
|
||||||
@@ -823,6 +824,16 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||||||
case MESA_SHADER_TASK:
|
case MESA_SHADER_TASK:
|
||||||
info->workgroup_size =
|
info->workgroup_size =
|
||||||
ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
||||||
|
|
||||||
|
/* Allow the compiler to assume that the shader always has full subgroups,
|
||||||
|
* meaning that the initial EXEC mask is -1 in all waves (all lanes enabled).
|
||||||
|
* This assumption is incorrect for ray tracing and internal (meta) shaders
|
||||||
|
* because they can use unaligned dispatch.
|
||||||
|
*/
|
||||||
|
info->cs.uses_full_subgroups =
|
||||||
|
pipeline_type != RADV_PIPELINE_RAY_TRACING &&
|
||||||
|
!nir->info.internal &&
|
||||||
|
(info->workgroup_size % info->wave_size) == 0;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_MESH:
|
case MESA_SHADER_MESH:
|
||||||
/* Already computed in gather_shader_info_mesh(). */
|
/* Already computed in gather_shader_info_mesh(). */
|
||||||
|
Reference in New Issue
Block a user