anv: disable SIMD16 for RT shaders

Since divergence is a lot more likely in RT than compute, it makes
sense to limit ourselves to SIMD8.

The trampoline shader defaults to SIMD16 since this one is uniform.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16970>
This commit is contained in:
Lionel Landwerlin
2022-05-03 22:12:57 +03:00
committed by Marge Bot
parent 5814436159
commit 23c7142cd6
3 changed files with 13 additions and 4 deletions

View File

@@ -7783,7 +7783,9 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data,
bool has_spilled = false;
uint8_t simd_size = 0;
if (!INTEL_DEBUG(DEBUG_NO8)) {
if ((shader->info.subgroup_size == SUBGROUP_SIZE_VARYING ||
shader->info.subgroup_size == SUBGROUP_SIZE_REQUIRE_8) &&
!INTEL_DEBUG(DEBUG_NO8)) {
v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader,
8, debug_enabled);
@@ -7801,7 +7803,9 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data,
}
}
if (!has_spilled && !INTEL_DEBUG(DEBUG_NO16)) {
if ((shader->info.subgroup_size == SUBGROUP_SIZE_VARYING ||
shader->info.subgroup_size == SUBGROUP_SIZE_REQUIRE_16) &&
!has_spilled && !INTEL_DEBUG(DEBUG_NO16)) {
v16 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader,
16, debug_enabled);

View File

@@ -1514,7 +1514,8 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
case SUBGROUP_SIZE_REQUIRE_8:
case SUBGROUP_SIZE_REQUIRE_16:
case SUBGROUP_SIZE_REQUIRE_32:
assert(gl_shader_stage_uses_workgroup(info->stage));
assert(gl_shader_stage_uses_workgroup(info->stage) ||
(info->stage >= MESA_SHADER_RAYGEN && info->stage <= MESA_SHADER_CALLABLE));
/* These enum values are expressly chosen to be equal to the subgroup
* size that they require.
*/

View File

@@ -2706,6 +2706,8 @@ anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
}
stages[i].nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i],
layout, false /* use_primitive_replication */);
@@ -2885,7 +2887,7 @@ anv_device_init_rt_shaders(struct anv_device *device)
nir_shader *trampoline_nir =
brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;
struct anv_pipeline_bind_map bind_map = {
.surface_count = 0,
@@ -2943,6 +2945,8 @@ anv_device_init_rt_shaders(struct anv_device *device)
nir_shader *trivial_return_nir =
brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
trivial_return_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, device->info);
struct anv_pipeline_bind_map bind_map = {