iris: Enable threaded shader compilation

There are a couple minor things that can be improved: 1. Eliminate (or reduce) the dynamic allocation of the threaded_compile_job. 2. For apps like shader-db, improve the case where nr_threads=0. Right now this adds thread switching and mutex overhead. 3. Other performance improvements? iris_uncompiled_shader::variants has some special properties that make it ripe for replacement with a lockless list. Without gathering some data, it's hard to guess what impact that could have. v2: Fix whitespace and formatting issues. Noticed by Ken. s/threaded_compile_job/iris_threaded_compile_job/g. Suggested by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>
2021-06-04 14:17:42 -07:00
parent 9011cc7405
commit 42c34e1ac8
5 changed files with 135 additions and 6 deletions
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -38,6 +38,7 @@
 #include "util/u_atomic.h"
 #include "util/u_upload_mgr.h"
 #include "util/debug.h"
+#include "util/u_async_debug.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_serialize.h"
@@ -54,6 +55,14 @@
   .base.tex.compressed_multisample_layout_mask = ~0,    \
   .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)

+struct iris_threaded_compile_job {
+   struct iris_screen *screen;
+   struct u_upload_mgr *uploader;
+   struct pipe_debug_callback *dbg;
+   struct iris_uncompiled_shader *ish;
+   struct iris_compiled_shader *shader;
+};
+
 static unsigned
 get_new_program_id(struct iris_screen *screen)
 {
@@ -1174,6 +1183,42 @@ find_or_add_variant(const struct iris_screen *screen,
   return variant;
 }

+static void
+iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
+                                 UNUSED int thread_index)
+{
+   free(_job);
+}
+
+static void
+iris_schedule_compile(struct iris_screen *screen,
+                      struct util_queue_fence *ready_fence,
+                      struct pipe_debug_callback *dbg,
+                      struct iris_threaded_compile_job *job,
+                      util_queue_execute_func execute)
+
+{
+   util_queue_fence_init(ready_fence);
+
+   struct util_async_debug_callback async_debug;
+
+   if (dbg) {
+      u_async_debug_init(&async_debug);
+      job->dbg = &async_debug.base;
+   }
+
+   util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
+                      iris_threaded_compile_job_delete, 0);
+
+   if (screen->driconf.sync_compile || dbg)
+      util_queue_fence_wait(ready_fence);
+
+   if (dbg) {
+      u_async_debug_drain(&async_debug, dbg);
+      u_async_debug_cleanup(&async_debug);
+   }
+}
+
 /**
 * Compile a vertex shader, and upload the assembly.
 */
@@ -2457,12 +2502,17 @@ iris_create_compute_state(struct pipe_context *ctx,
 }

 static void
-iris_compile_shader(struct iris_screen *screen,
-                    struct u_upload_mgr *uploader,
-                    struct pipe_debug_callback *dbg,
-                    struct iris_uncompiled_shader *ish,
-                    struct iris_compiled_shader *shader)
+iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
 {
+   const struct iris_threaded_compile_job *job =
+      (struct iris_threaded_compile_job *) _job;
+
+   struct iris_screen *screen = job->screen;
+   struct u_upload_mgr *uploader = job->uploader;
+   struct pipe_debug_callback *dbg = job->dbg;
+   struct iris_uncompiled_shader *ish = job->ish;
+   struct iris_compiled_shader *shader = job->shader;
+
   switch (ish->nir->info.stage) {
   case MESA_SHADER_VERTEX:
      iris_compile_vs(screen, uploader, dbg, ish, shader);
@@ -2615,7 +2665,17 @@ iris_create_shader_state(struct pipe_context *ctx,

      if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
                                    &key, key_size)) {
-         iris_compile_shader(screen, uploader, &ice->dbg, ish, shader);
+         assert(!util_queue_fence_is_signalled(&shader->ready));
+
+         struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
+
+         job->screen = screen;
+         job->uploader = uploader;
+         job->ish = ish;
+         job->shader = shader;
+
+         iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
+                               iris_compile_shader);
      }
   }

@@ -2643,6 +2703,7 @@ iris_destroy_shader_state(struct pipe_context *ctx, void *state)
   }

   simple_mtx_destroy(&ish->lock);
+   util_queue_fence_destroy(&ish->ready);

   ralloc_free(ish->nir);
   free(ish);