anv: disable SIMD16 for RT shaders

Since divergence is a lot more likely in RT than compute, it makes sense to limit ourselves to SIMD8. The trampoline shader defaults to SIMD16 since this one is uniform. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16970>
2022-05-03 22:12:57 +03:00
parent 5814436159
commit 23c7142cd6
3 changed files with 13 additions and 4 deletions
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7783,7 +7783,9 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data,
   bool has_spilled = false;

   uint8_t simd_size = 0;
-   if (!INTEL_DEBUG(DEBUG_NO8)) {
+   if ((shader->info.subgroup_size == SUBGROUP_SIZE_VARYING ||
+        shader->info.subgroup_size == SUBGROUP_SIZE_REQUIRE_8) &&
+       !INTEL_DEBUG(DEBUG_NO8)) {
      v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
                          &prog_data->base, shader,
                          8, debug_enabled);
@@ -7801,7 +7803,9 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data,
      }
   }

-   if (!has_spilled && !INTEL_DEBUG(DEBUG_NO16)) {
+   if ((shader->info.subgroup_size == SUBGROUP_SIZE_VARYING ||
+        shader->info.subgroup_size == SUBGROUP_SIZE_REQUIRE_16) &&
+       !has_spilled && !INTEL_DEBUG(DEBUG_NO16)) {
      v16 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
                           &prog_data->base, shader,
                           16, debug_enabled);
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -1514,7 +1514,8 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
   case SUBGROUP_SIZE_REQUIRE_8:
   case SUBGROUP_SIZE_REQUIRE_16:
   case SUBGROUP_SIZE_REQUIRE_32:
-      assert(gl_shader_stage_uses_workgroup(info->stage));
+      assert(gl_shader_stage_uses_workgroup(info->stage) ||
+             (info->stage >= MESA_SHADER_RAYGEN && info->stage <= MESA_SHADER_CALLABLE));
      /* These enum values are expressly chosen to be equal to the subgroup
       * size that they require.
       */
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -2706,6 +2706,8 @@ anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
      }

+      stages[i].nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
+
      anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i],
                             layout, false /* use_primitive_replication */);

@@ -2885,7 +2887,7 @@ anv_device_init_rt_shaders(struct anv_device *device)
      nir_shader *trampoline_nir =
         brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);

-      trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
+      trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;

      struct anv_pipeline_bind_map bind_map = {
         .surface_count = 0,
@@ -2943,6 +2945,8 @@ anv_device_init_rt_shaders(struct anv_device *device)
      nir_shader *trivial_return_nir =
         brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);

+      trivial_return_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
+
      NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, device->info);

      struct anv_pipeline_bind_map bind_map = {