anv: add direct descriptor support to apply_layout

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
2023-02-24 20:02:57 +02:00
parent 1d24d0bdcc
commit 06dfd216d3
5 changed files with 825 additions and 291 deletions
--- a/src/intel/genxml/meson.build
+++ b/src/intel/genxml/meson.build
@@ -74,6 +74,10 @@ genX_bits_included_symbols = [
  'RENDER_SURFACE_STATE::Green Clear Color',
  'RENDER_SURFACE_STATE::Blue Clear Color',
  'RENDER_SURFACE_STATE::Alpha Clear Color',
+  'RENDER_SURFACE_STATE::Width',
+  'RENDER_SURFACE_STATE::Height',
+  'RENDER_SURFACE_STATE::Depth',
+  'RENDER_SURFACE_STATE::Surface Type',
  'CLEAR_COLOR',
  'VERTEX_BUFFER_STATE::Buffer Starting Address',
  'CPS_STATE',
--- a/src/intel/vulkan/anv_nir.h
+++ b/src/intel/vulkan/anv_nir.h
@@ -31,6 +31,18 @@
 extern "C" {
 #endif

+/* This map is represent a mapping where the key is the NIR
+ * nir_intrinsic_resource_intel::block index. It allows mapping bindless UBOs
+ * accesses to descriptor entry.
+ *
+ * This map only temporary lives between the anv_nir_apply_pipeline_layout()
+ * and anv_nir_compute_push_layout() passes.
+ */
+struct anv_pipeline_push_map {
+   uint32_t                     block_count;
+   struct anv_pipeline_binding *block_to_descriptor;
+};
+
 bool anv_check_for_primitive_replication(struct anv_device *device,
                                         VkShaderStageFlags stages,
                                         nir_shader **shaders,
@@ -71,7 +83,9 @@ void anv_nir_apply_pipeline_layout(nir_shader *shader,
                                   bool robust_buffer_access,
                                   bool independent_sets,
                                   const struct anv_pipeline_sets_layout *layout,
-                                   struct anv_pipeline_bind_map *map);
+                                   struct anv_pipeline_bind_map *map,
+                                   struct anv_pipeline_push_map *push_map,
+                                   void *push_map_mem_ctx);

 void anv_nir_compute_push_layout(nir_shader *nir,
                                 const struct anv_physical_device *pdevice,
@@ -79,6 +93,7 @@ void anv_nir_compute_push_layout(nir_shader *nir,
                                 bool fragment_dynamic,
                                 struct brw_stage_prog_data *prog_data,
                                 struct anv_pipeline_bind_map *map,
+                                 const struct anv_pipeline_push_map *push_map,
                                 void *mem_ctx);

 void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
--- a/src/intel/vulkan/anv_nir_compute_push_layout.c
+++ b/src/intel/vulkan/anv_nir_compute_push_layout.c
@@ -35,6 +35,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
                            bool fragment_dynamic,
                            struct brw_stage_prog_data *prog_data,
                            struct anv_pipeline_bind_map *map,
+                            const struct anv_pipeline_push_map *push_map,
                            void *mem_ctx)
 {
   const struct brw_compiler *compiler = pdevice->compiler;
@@ -257,8 +258,9 @@ anv_nir_compute_push_layout(nir_shader *nir,
            continue;
         }

+         assert(ubo_range->block < push_map->block_count);
         const struct anv_pipeline_binding *binding =
-            &map->surface_to_descriptor[ubo_range->block];
+            &push_map->block_to_descriptor[ubo_range->block];

         map->push_ranges[n++] = (struct anv_push_range) {
            .set = binding->set,
@@ -299,6 +301,16 @@ anv_nir_compute_push_layout(nir_shader *nir,
         (fs_msaa_flags_offset - push_start) / 4;
   }

+#if 0
+   fprintf(stderr, "stage=%s push ranges:\n", gl_shader_stage_name(nir->info.stage));
+   for (unsigned i = 0; i < ARRAY_SIZE(map->push_ranges); i++)
+      fprintf(stderr, "   range%i: %03u-%03u set=%u index=%u\n", i,
+              map->push_ranges[i].start,
+              map->push_ranges[i].length,
+              map->push_ranges[i].set,
+              map->push_ranges[i].index);
+#endif
+
   /* Now that we're done computing the push constant portion of the
    * bind map, hash it.  This lets us quickly determine if the actual
    * mapping has changed and not just a no-op pipeline change.
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -978,11 +978,13 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
   stage->push_desc_info.used_descriptors =
      anv_nir_compute_used_push_descriptors(nir, layout);

+   struct anv_pipeline_push_map push_map = {};
+
   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
   NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
              pdevice, pipeline->device->robust_buffer_access,
              layout->independent_sets,
-              layout, &stage->bind_map);
+              layout, &stage->bind_map, &push_map, mem_ctx);

   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
            anv_nir_ubo_addr_format(pdevice, pipeline->device->robust_buffer_access));
@@ -993,8 +995,14 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
    * calculations often create and then constant-fold so that, when we
    * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
    */
-   NIR_PASS(_, nir, nir_copy_prop);
-   NIR_PASS(_, nir, nir_opt_constant_folding);
+   bool progress;
+   do {
+      progress = false;
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_opt_dce);
+   } while (progress);

   /* Required for nir_divergence_analysis() which is needed for
    * anv_nir_lower_ubo_loads.
@@ -1007,7 +1015,9 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
   NIR_PASS(_, nir, nir_opt_remove_phis);

   enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
-      nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access;
+      nir_lower_non_uniform_texture_access |
+      nir_lower_non_uniform_image_access |
+      nir_lower_non_uniform_get_ssbo_size;

   /* In practice, most shaders do not have non-uniform-qualified
    * accesses (see
@@ -1038,7 +1048,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
   NIR_PASS_V(nir, anv_nir_compute_push_layout,
              pdevice, pipeline->device->robust_buffer_access,
              anv_graphics_pipeline_stage_fragment_dynamic(stage),
-              prog_data, &stage->bind_map, mem_ctx);
+              prog_data, &stage->bind_map, &push_map, mem_ctx);

   NIR_PASS_V(nir, anv_nir_lower_resource_intel, pdevice,
              pipeline->layout.type);
@@ -3193,6 +3203,15 @@ VkResult anv_CreateGraphicsPipelines(
   return result;
 }

+static bool
+should_remat_cb(nir_instr *instr, void *data)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel;
+}
+
 static VkResult
 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
                         struct vk_pipeline_cache *cache,
@@ -3214,6 +3233,7 @@ compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
         .localized_loads = true,
         .vectorizer_callback = brw_nir_should_vectorize_mem,
         .vectorizer_data = NULL,
+         .should_remat_callback = should_remat_cb,
      };

      NIR_PASS(_, nir, nir_lower_shader_calls, &opts,