vtn/opencl: Hook up OpenCL async copy and group wait opcodes via libclc

Adds an additional hook for spirv_to_nir to handle a core opcode via the OpenCL libclc infrastructure, and adds handling for SpvOpGroupAsyncCopy and SpvOpGroupWaitEvents. Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6035>
2020-09-25 12:13:17 -07:00
parent 00261d883d
commit 6aee49578f
3 changed files with 67 additions and 0 deletions
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -5500,6 +5500,11 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
   case SpvOpLifetimeStop:
      break;

+   case SpvOpGroupAsyncCopy:
+   case SpvOpGroupWaitEvents:
+      vtn_handle_opencl_core_instruction(b, opcode, w, count);
+      break;
+
   default:
      vtn_fail_with_opcode("Unhandled opcode", opcode);
   }
--- a/src/compiler/spirv/vtn_opencl.c
+++ b/src/compiler/spirv/vtn_opencl.c
@@ -582,6 +582,49 @@ handle_special(struct vtn_builder *b, uint32_t opcode,
   return ret;
 }

+static nir_ssa_def *
+handle_core(struct vtn_builder *b, uint32_t opcode,
+            unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
+            const struct vtn_type *dest_type)
+{
+   nir_deref_instr *ret_deref = NULL;
+
+   switch ((SpvOp)opcode) {
+   case SpvOpGroupAsyncCopy: {
+      /* Libclc doesn't include 3-component overloads of the async copy functions.
+       * However, the CLC spec says:
+       * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
+       * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
+       * vector types
+       */
+      for (unsigned i = 0; i < num_srcs; ++i) {
+         if (src_types[i]->base_type == vtn_base_type_pointer &&
+             src_types[i]->deref->base_type == vtn_base_type_vector &&
+             src_types[i]->deref->length == 3) {
+            src_types[i] =
+               get_pointer_type(b,
+                                get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
+                                src_types[i]->storage_class);
+         }
+      }
+      if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
+         return NULL;
+      break;
+   }
+   case SpvOpGroupWaitEvents: {
+      src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
+      if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
+         return NULL;
+      break;
+   }
+   default:
+      return NULL;
+   }
+
+   return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
+}
+
+
 static void
 _handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
                     const uint32_t *w, unsigned count, bool load)
@@ -888,3 +931,20 @@ vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
      return false;
   }
 }
+
+bool
+vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
+                                   const uint32_t *w, unsigned count)
+{
+   switch (opcode) {
+   case SpvOpGroupAsyncCopy:
+      handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
+      return true;
+   case SpvOpGroupWaitEvents:
+      handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
+      return true;
+   default:
+      return false;
+   }
+   return true;
+}
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -889,6 +889,8 @@ bool vtn_handle_glsl450_instruction(struct vtn_builder *b, SpvOp ext_opcode,

 bool vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
                                   const uint32_t *words, unsigned count);
+bool vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
+                                        const uint32_t *w, unsigned count);

 struct vtn_builder* vtn_create_builder(const uint32_t *words, size_t word_count,
                                       gl_shader_stage stage, const char *entry_point_name,