anv: rewrite internal shaders using OpenCL

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26797>
2023-11-06 13:12:25 +02:00
parent da391650f5
commit b52e25d3a8
26 changed files with 911 additions and 1133 deletions
--- a/meson.build
+++ b/meson.build
@@ -298,7 +298,8 @@ with_any_broadcom = [
 if ['x86_64'].contains(host_machine.cpu_family()) and \
   get_option('intel-clc') != 'system'
  # Require intel-clc with Anv & Iris (for internal shaders)
-  with_intel_clc = get_option('intel-clc') == 'enabled'
+  with_intel_clc = get_option('intel-clc') == 'enabled' or \
+                   with_intel_vk
 else
  with_intel_clc = false
 endif
--- a/src/intel/genxml/meson.build
+++ b/src/intel/genxml/meson.build
@@ -119,16 +119,12 @@ endforeach

 genX_cl_included_symbols = [
  # instructions
-  'MI_BATCH_BUFFER_START',
-  '3DSTATE_CONSTANT_ALL',
-  '3DSTATE_CONSTANT_VS',
-  '3DSTATE_RASTER',
  '3DSTATE_INDEX_BUFFER',
  '3DSTATE_VERTEX_BUFFERS',
  '3DPRIMITIVE',
  '3DPRIMITIVE_EXTENDED',
+  'MI_BATCH_BUFFER_START',
  # structures
-  '3DSTATE_CONSTANT_BODY',
  'VERTEX_BUFFER_STATE',
 ]

--- a/src/intel/meson.build
+++ b/src/intel/meson.build
@@ -28,6 +28,7 @@ endif
 subdir('isl')
 subdir('common')
 subdir('compiler')
+subdir('shaders')
 if with_intel_hasvk or with_intel_vk or with_gallium_crocus or with_gallium_iris or with_intel_tools
  subdir('perf')
 endif
--- a/src/intel/shaders/generate.cl
+++ b/src/intel/shaders/generate.cl
@@ -0,0 +1,206 @@
+/* Copyright © 2023 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "libintel_shaders.h"
+
+void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr,
+                                        uint32_t buffer_count)
+{
+   struct GENX(3DSTATE_VERTEX_BUFFERS) v = {
+      GENX(3DSTATE_VERTEX_BUFFERS_header),
+   };
+   v.DWordLength = 1 + (buffer_count * 4) -
+                   GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
+   GENX(3DSTATE_VERTEX_BUFFERS_pack)(dst_ptr, &v);
+}
+
+void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr,
+                                     uint32_t mocs,
+                                     uint32_t buffer_idx,
+                                     uint64_t address,
+                                     uint32_t size,
+                                     uint32_t stride)
+{
+   bool buffer_null = address == 0;
+   struct GENX(VERTEX_BUFFER_STATE) v = {
+      .BufferPitch = stride,
+      .NullVertexBuffer = address == 0,
+      .AddressModifyEnable = true,
+      .MOCS = mocs,
+#if GFX_VER >= 12
+      .L3BypassDisable = true,
+#endif
+      .VertexBufferIndex = buffer_idx,
+      .BufferStartingAddress = address,
+      .BufferSize = size,
+   };
+   GENX(VERTEX_BUFFER_STATE_pack)(dst_ptr, &v);
+}
+
+#if GFX_VER == 9
+void genX(write_3DPRIMITIVE)(global void *dst_ptr,
+                             bool is_predicated,
+                             bool is_indexed,
+                             bool uses_tbimr,
+                             uint32_t vertex_count_per_instance,
+                             uint32_t start_vertex_location,
+                             uint32_t instance_count,
+                             uint32_t start_instance_location,
+                             uint32_t base_vertex_location)
+{
+   struct GENX(3DPRIMITIVE) v = {
+      GENX(3DPRIMITIVE_header),
+#if GFX_VERx10 >= 125
+      .TBIMREnable = uses_tbimr,
+#endif
+      .PredicateEnable = is_predicated,
+      .VertexAccessType = is_indexed ? RANDOM : SEQUENTIAL,
+      .VertexCountPerInstance = vertex_count_per_instance,
+      .StartVertexLocation = start_vertex_location,
+      .InstanceCount = instance_count,
+      .StartInstanceLocation = start_instance_location,
+      .BaseVertexLocation = base_vertex_location,
+   };
+   GENX(3DPRIMITIVE_pack)(dst_ptr, &v);
+}
+#endif
+
+#if GFX_VER >= 11
+void genX(write_3DPRIMITIVE_EXTENDED)(global void *dst_ptr,
+                                      bool is_predicated,
+                                      bool is_indexed,
+                                      bool uses_tbimr,
+                                      uint32_t vertex_count_per_instance,
+                                      uint32_t start_vertex_location,
+                                      uint32_t instance_count,
+                                      uint32_t start_instance_location,
+                                      uint32_t base_vertex_location,
+                                      uint32_t param_base_vertex,
+                                      uint32_t param_base_instance,
+                                      uint32_t param_draw_id)
+{
+   struct GENX(3DPRIMITIVE_EXTENDED) v = {
+      GENX(3DPRIMITIVE_EXTENDED_header),
+#if GFX_VERx10 >= 125
+      .TBIMREnable = uses_tbimr,
+#endif
+      .PredicateEnable = is_predicated,
+      .VertexAccessType = is_indexed ? RANDOM : SEQUENTIAL,
+      .VertexCountPerInstance = vertex_count_per_instance,
+      .StartVertexLocation = start_vertex_location,
+      .InstanceCount = instance_count,
+      .StartInstanceLocation = start_instance_location,
+      .BaseVertexLocation = base_vertex_location,
+      .ExtendedParameter0 = param_base_vertex,
+      .ExtendedParameter1 = param_base_instance,
+      .ExtendedParameter2 = param_draw_id,
+   };
+   GENX(3DPRIMITIVE_EXTENDED_pack)(dst_ptr, &v);
+}
+#endif
+
+void genX(write_MI_BATCH_BUFFER_START)(global void *dst_ptr, uint64_t addr)
+{
+   struct GENX(MI_BATCH_BUFFER_START) v = {
+      GENX(MI_BATCH_BUFFER_START_header),
+      .AddressSpaceIndicator = ASI_PPGTT,
+      .BatchBufferStartAddress = addr,
+   };
+   GENX(MI_BATCH_BUFFER_START_pack)(dst_ptr, &v);
+}
+
+void genX(write_draw)(global uint32_t *dst_ptr,
+                      global void *indirect_ptr,
+                      global uint32_t *draw_id_ptr,
+                      uint32_t draw_id,
+                      uint32_t instance_multiplier,
+                      bool is_indexed,
+                      bool is_predicated,
+                      bool uses_tbimr,
+                      bool uses_base,
+                      bool uses_drawid,
+                      uint32_t mocs)
+{
+#if GFX_VER == 9
+   if (uses_base || uses_drawid) {
+      uint32_t vertex_buffer_count =
+         (uses_base ? 1 : 0) + (uses_drawid ? 1 : 0);
+      genX(write_3DSTATE_VERTEX_BUFFERS)(dst_ptr, vertex_buffer_count);
+      dst_ptr += 1; /* GENX(3DSTATE_VERTEX_BUFFERS_length); */
+      if (uses_base) {
+         uint64_t base_addr = (uint64_t)indirect_ptr + (is_indexed ? 12 : 8);
+         genX(write_VERTEX_BUFFER_STATE)(dst_ptr, mocs, 31, base_addr, 8, 0);
+         dst_ptr += GENX(VERTEX_BUFFER_STATE_length);
+      }
+      if (uses_drawid) {
+         *draw_id_ptr = draw_id;
+         genX(write_VERTEX_BUFFER_STATE)(dst_ptr, mocs, 32,
+                                         (uint64_t)draw_id_ptr, 4, 0);
+         dst_ptr += GENX(VERTEX_BUFFER_STATE_length);
+      }
+   }
+
+   if (is_indexed) {
+      VkDrawIndexedIndirectCommand data =
+         *((global VkDrawIndexedIndirectCommand *)indirect_ptr);
+
+      genX(write_3DPRIMITIVE)(dst_ptr,
+                              is_predicated,
+                              is_indexed,
+                              uses_tbimr,
+                              data.indexCount,
+                              data.firstIndex,
+                              data.instanceCount * instance_multiplier,
+                              data.firstInstance,
+                              data.vertexOffset);
+   } else {
+      VkDrawIndirectCommand data =
+         *((global VkDrawIndirectCommand *)indirect_ptr);
+
+      genX(write_3DPRIMITIVE)(dst_ptr,
+                              is_predicated,
+                              is_indexed,
+                              uses_tbimr,
+                              data.vertexCount,
+                              data.firstVertex,
+                              data.instanceCount * instance_multiplier,
+                              data.firstInstance,
+                              0 /* base_vertex_location */);
+   }
+#else
+   if (is_indexed) {
+      VkDrawIndexedIndirectCommand data =
+         *((global VkDrawIndexedIndirectCommand *)indirect_ptr);
+
+      genX(write_3DPRIMITIVE_EXTENDED)(dst_ptr,
+                                       is_predicated,
+                                       is_indexed,
+                                       uses_tbimr,
+                                       data.indexCount,
+                                       data.firstIndex,
+                                       data.instanceCount * instance_multiplier,
+                                       data.firstInstance,
+                                       data.vertexOffset,
+                                       data.vertexOffset,
+                                       data.firstInstance,
+                                       draw_id);
+   } else {
+      VkDrawIndirectCommand data =
+         *((global VkDrawIndirectCommand *)indirect_ptr);
+
+      genX(write_3DPRIMITIVE_EXTENDED)(dst_ptr,
+                                       is_predicated,
+                                       is_indexed,
+                                       uses_tbimr,
+                                       data.vertexCount,
+                                       data.firstVertex,
+                                       data.instanceCount * instance_multiplier,
+                                       data.firstInstance,
+                                       0 /* base_vertex_location */,
+                                       data.firstVertex,
+                                       data.firstInstance,
+                                       draw_id);
+   }
+#endif
+}
--- a/src/intel/shaders/generate_draws.cl
+++ b/src/intel/shaders/generate_draws.cl
@@ -0,0 +1,77 @@
+/* Copyright © 2023 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "libintel_shaders.h"
+
+static void end_generated_draws(global void *dst_ptr,
+                                uint32_t item_idx,
+                                uint32_t draw_id, uint32_t draw_count,
+                                uint32_t ring_count, uint32_t max_draw_count,
+                                uint32_t flags,
+                                uint64_t gen_addr, uint64_t end_addr)
+{
+   uint32_t _3dprim_size_B = ((flags >> 16) & 0xff) * 4;
+   bool indirect_count = (flags & ANV_GENERATED_FLAG_COUNT) != 0;
+   bool ring_mode = (flags & ANV_GENERATED_FLAG_RING_MODE) != 0;
+   /* We can have an indirect draw count = 0. */
+   uint32_t last_draw_id = draw_count == 0 ? 0 : (min(draw_count, max_draw_count) - 1);
+   global void *jump_dst = draw_count == 0 ? dst_ptr : (dst_ptr + _3dprim_size_B);
+
+   if (ring_mode) {
+      if (draw_id == last_draw_id) {
+         /* Exit the ring buffer to the next user commands */
+         genX(write_MI_BATCH_BUFFER_START)(jump_dst, end_addr);
+      } else if (item_idx == (ring_count - 1)) {
+         /* Jump back to the generation shader to generate mode draws */
+         genX(write_MI_BATCH_BUFFER_START)(jump_dst, gen_addr);
+      }
+   } else {
+      if (draw_id == last_draw_id && draw_count < max_draw_count) {
+         /* Skip forward to the end of the generated draws */
+         genX(write_MI_BATCH_BUFFER_START)(jump_dst, end_addr);
+      }
+   }
+}
+
+void
+genX(libanv_write_draw)(global void *dst_base,
+                        global void *indirect_base,
+                        global void *draw_id_base,
+                        uint32_t indirect_stride,
+                        global uint32_t *_draw_count,
+                        uint32_t draw_base,
+                        uint32_t instance_multiplier,
+                        uint32_t max_draw_count,
+                        uint32_t flags,
+                        uint32_t ring_count,
+                        uint64_t gen_addr,
+                        uint64_t end_addr,
+                        uint32_t item_idx)
+{
+   uint32_t _3dprim_size_B = ((flags >> 16) & 0xff) * 4;
+   uint32_t draw_id = draw_base + item_idx;
+   uint32_t draw_count = *_draw_count;
+   global void *dst_ptr = dst_base + item_idx * _3dprim_size_B;
+   global void *indirect_ptr = indirect_base + draw_id * indirect_stride;
+   global void *draw_id_ptr = draw_id_base + item_idx * 4;
+
+   if (draw_id < min(draw_count, max_draw_count)) {
+      bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0;
+      bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
+      bool uses_tbimr = (flags & ANV_GENERATED_FLAG_TBIMR) != 0;
+      bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0;
+      bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0;
+      uint32_t mocs = (flags >> 8) & 0xff;
+
+      genX(write_draw)(dst_ptr, indirect_ptr, draw_id_ptr,
+                       draw_id, instance_multiplier,
+                       is_indexed, is_predicated,
+                       uses_tbimr, uses_base, uses_drawid,
+                       mocs);
+   }
+
+   end_generated_draws(dst_ptr, item_idx, draw_id, draw_count,
+                       ring_count, max_draw_count, flags,
+                       gen_addr, end_addr);
+}
--- a/src/intel/shaders/libintel_shaders.h
+++ b/src/intel/shaders/libintel_shaders.h
@@ -0,0 +1,127 @@
+/* Copyright © 2023 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef _LIBANV_SHADERS_H_
+#define _LIBANV_SHADERS_H_
+
+/* Define stdint types compatible between the CPU and GPU for shared headers */
+#ifndef __OPENCL_VERSION__
+#include <stdint.h>
+
+#include "util/macros.h"
+
+#else
+#define BITFIELD_BIT(i) (1u << i)
+
+typedef ulong uint64_t;
+typedef uint uint32_t;
+typedef ushort uint16_t;
+typedef uchar uint8_t;
+
+typedef long int64_t;
+typedef int int32_t;
+typedef short int16_t;
+typedef char int8_t;
+
+typedef struct VkDrawIndexedIndirectCommand {
+    uint32_t    indexCount;
+    uint32_t    instanceCount;
+    uint32_t    firstIndex;
+    int32_t     vertexOffset;
+    uint32_t    firstInstance;
+} VkDrawIndexedIndirectCommand __attribute__((aligned(4)));
+
+typedef struct VkDrawIndirectCommand {
+    uint32_t    vertexCount;
+    uint32_t    instanceCount;
+    uint32_t    firstVertex;
+    uint32_t    firstInstance;
+} VkDrawIndirectCommand __attribute__((aligned(4)));
+
+#include "genxml/gen_macros.h"
+#include "genxml/genX_cl_pack.h"
+#endif
+
+/**
+ * Flags for generated_draws.cl
+ */
+#define ANV_GENERATED_FLAG_INDEXED    BITFIELD_BIT(0)
+#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
+/* Only used on Gfx9, means the pipeline is using gl_DrawID */
+#define ANV_GENERATED_FLAG_DRAWID     BITFIELD_BIT(2)
+/* Only used on Gfx9, means the pipeline is using gl_BaseVertex or
+ * gl_BaseInstance
+ */
+#define ANV_GENERATED_FLAG_BASE       BITFIELD_BIT(3)
+/* Whether the count is indirect  */
+#define ANV_GENERATED_FLAG_COUNT      BITFIELD_BIT(4)
+/* Whether the generation shader writes to the ring buffer */
+#define ANV_GENERATED_FLAG_RING_MODE  BITFIELD_BIT(5)
+/* Whether TBIMR tile-based rendering shall be enabled. */
+#define ANV_GENERATED_FLAG_TBIMR      BITFIELD_BIT(6)
+
+/**
+ * Flags for query_copy.cl
+ */
+#define ANV_COPY_QUERY_FLAG_RESULT64  BITFIELD_BIT(0)
+#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
+#define ANV_COPY_QUERY_FLAG_DELTA     BITFIELD_BIT(2)
+#define ANV_COPY_QUERY_FLAG_PARTIAL   BITFIELD_BIT(3)
+
+#ifdef __OPENCL_VERSION__
+
+void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr,
+                                        uint32_t buffer_count);
+
+void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr,
+                                     uint32_t mocs,
+                                     uint32_t buffer_idx,
+                                     uint64_t address,
+                                     uint32_t size,
+                                     uint32_t stride);
+
+#if GFX_VER == 9
+void genX(write_3DPRIMITIVE)(global void *dst_ptr,
+                             bool is_predicated,
+                             bool is_indexed,
+                             bool use_tbimr,
+                             uint32_t vertex_count_per_instance,
+                             uint32_t start_vertex_location,
+                             uint32_t instance_count,
+                             uint32_t start_instance_location,
+                             uint32_t base_vertex_location);
+#endif
+
+#if GFX_VER >= 11
+void genX(write_3DPRIMITIVE_EXTENDED)(global void *dst_ptr,
+                                      bool is_predicated,
+                                      bool is_indexed,
+                                      bool use_tbimr,
+                                      uint32_t vertex_count_per_instance,
+                                      uint32_t start_vertex_location,
+                                      uint32_t instance_count,
+                                      uint32_t start_instance_location,
+                                      uint32_t base_vertex_location,
+                                      uint32_t param_base_vertex,
+                                      uint32_t param_base_instance,
+                                      uint32_t param_draw_id);
+#endif
+
+void genX(write_MI_BATCH_BUFFER_START)(global void *dst_ptr, uint64_t addr);
+
+void genX(write_draw)(global uint32_t *dst_ptr,
+                      global void *indirect_ptr,
+                      global uint32_t *draw_id_ptr,
+                      uint32_t draw_id,
+                      uint32_t instance_multiplier,
+                      bool is_indexed,
+                      bool is_predicated,
+                      bool uses_tbimr,
+                      bool uses_base,
+                      bool uses_draw_id,
+                      uint32_t mocs);
+
+#endif /* __OPENCL_VERSION__ */
+
+#endif /* _LIBANV_SHADERS_H_ */
--- a/src/intel/shaders/memcpy.cl
+++ b/src/intel/shaders/memcpy.cl
@@ -0,0 +1,23 @@
+/* Copyright © 2023 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+void
+genX(libanv_memcpy)(global void *dst_base,
+                    global void *src_base,
+                    uint num_dwords,
+                    uint dword_offset)
+{
+   global void *dst = dst_base + 4 * dword_offset;
+   global void *src = src_base + 4 * dword_offset;
+
+   if (dword_offset + 4 <= num_dwords) {
+      *(global uint4 *)(dst) = *(global uint4 *)(src);
+   } else if (dword_offset + 3 <= num_dwords) {
+      *(global uint3 *)(dst) = *(global uint3 *)(src);
+   } else if (dword_offset + 2 <= num_dwords) {
+      *(global uint2 *)(dst) = *(global uint2 *)(src);
+   } else if (dword_offset + 1 <= num_dwords) {
+      *(global uint *)(dst) = *(global uint *)(src);
+   }
+}
--- a/src/intel/shaders/meson.build
+++ b/src/intel/shaders/meson.build
@@ -0,0 +1,63 @@
+# Copyright © 2023 Intel Corporation
+# SPDX-License-Identifier: MIT
+
+intel_float64_spv_h = custom_target(
+  'float64_spv.h',
+  input : [glsl2spirv, float64_glsl_file],
+  output : 'float64_spv.h',
+  command : [
+    prog_python, '@INPUT@', '@OUTPUT@',
+    prog_glslang,
+    '--create-entry', 'main',
+    '--vn', 'float64_spv_source',
+    '--glsl-version', '450',
+    '-Olib',
+  ]
+)
+
+intel_shader_files = files(
+  'libintel_shaders.h',
+  'generate.cl',
+  'generate_draws.cl',
+  'memcpy.cl',
+  'query_copy.cl',
+)
+
+prepended_input_args = []
+foreach input_arg : intel_shader_files
+  prepended_input_args += ['--in', input_arg]
+endforeach
+
+intel_shaders_gens = [ [ 90,   9],
+                       [110,  11],
+                       [120,  12],
+                       [125, 125],
+                       [200,  20] ]
+intel_shaders = []
+foreach gen : intel_shaders_gens
+  intel_shaders += custom_target(
+    'intel_gfx@0@_shaders_code.h'.format(gen[1]),
+    input : intel_shader_files,
+    output : 'intel_gfx@0@_shaders_code.h'.format(gen[1]),
+    command : [
+      prog_intel_clc, '--nir',
+      '--prefix', 'gfx@0@_intel_shaders'.format(gen[1]),
+      prepended_input_args, '-o', '@OUTPUT@', '--',
+      '-cl-std=cl2.0', '-D__OPENCL_VERSION__=200',
+      '-DGFX_VERx10=@0@'.format(gen[0]),
+      '-I' + join_paths(meson.current_source_dir(), '.'),
+      '-I' + join_paths(meson.source_root(), 'src'),
+      '-I' + join_paths(meson.source_root(), 'src/intel'),
+      '-I' + join_paths(meson.build_root(), 'src/intel'),
+      '-I' + join_paths(meson.source_root(), 'src/intel/genxml'),
+      '-include', 'opencl-c.h',
+    ],
+    env: ['MESA_SHADER_CACHE_DISABLE=true'],
+    depends : [dep_prog_intel_clc, gen_cl_xml_pack],
+  )
+endforeach
+
+idep_intel_shaders = declare_dependency(
+  sources : intel_shaders,
+  include_directories : include_directories('.'),
+)
--- a/src/intel/shaders/query_copy.cl
+++ b/src/intel/shaders/query_copy.cl
@@ -0,0 +1,72 @@
+/* Copyright © 2023 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+void
+genX(libanv_query_copy)(global void *destination_base,
+                        uint32_t destination_stride,
+                        global void *query_data,
+                        uint32_t first_query,
+                        uint32_t num_queries,
+                        uint32_t query_data_offset,
+                        uint32_t query_stride,
+                        uint32_t num_query_items,
+                        uint32_t copy_flags,
+                        uint32_t copy_item_idx)
+{
+   if (copy_item_idx >= num_queries)
+      return;
+
+   bool is_result64     = (copy_flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0;
+   bool write_available = (copy_flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0;
+   bool compute_delta   = (copy_flags & ANV_COPY_QUERY_FLAG_DELTA) != 0;
+   bool partial_result  = (copy_flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0;
+
+
+   uint query_byte = (first_query + copy_item_idx) * query_stride;
+   uint query_data_byte = query_byte + query_data_offset;
+
+   global uint64_t *query = query_data + (first_query + copy_item_idx) * query_stride;
+   global uint64_t *dest64 = destination_base + copy_item_idx * destination_stride;
+   global uint32_t *dest32 = destination_base + copy_item_idx * destination_stride;
+
+   uint64_t availability = *(global uint32_t *)(query_data + query_byte);
+
+   if (write_available) {
+      if (is_result64)
+         dest64[num_query_items] = availability;
+      else
+         dest32[num_query_items] = availability;
+   }
+
+   for (uint32_t i = 0; i < num_query_items; i++) {
+      uint32_t qw_offset = 1 + i * 2;
+      uint64_t v;
+      if (compute_delta) {
+         struct delta64 {
+            uint64_t v0;
+            uint64_t v1;
+         } data = *((global struct delta64 *)&query[qw_offset]);
+         v = data.v1 - data.v0;
+      } else {
+         v = query[qw_offset + 0];
+      }
+
+      /* vkCmdCopyQueryPoolResults:
+       *
+       *    "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is
+       *     unavailable, an intermediate result between zero and the final
+       *     result value is written for that query."
+       *
+       * We write 0 as the values not being written yet, we can't really make
+       * provide any sensible value.
+       */
+      if (partial_result && availability == 0)
+         v = 0;
+
+      if (is_result64)
+         dest64[i] = v;
+      else
+         dest32[i] = v;
+   }
+}
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -39,6 +39,9 @@
 struct intel_sample_positions;
 struct intel_urb_config;

+typedef struct nir_builder nir_builder;
+typedef struct nir_shader nir_shader;
+
 extern const uint32_t genX(vk_to_intel_cullmode)[];

 extern const uint32_t genX(vk_to_intel_front_face)[];
@@ -59,6 +62,11 @@ VkResult genX(init_device_state)(struct anv_device *device);

 void genX(init_cps_device_state)(struct anv_device *device);

+nir_shader *genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx);
+
+uint32_t genX(call_internal_shader)(nir_builder *b,
+                                    enum anv_internal_kernel_name shader_name);
+
 void
 genX(set_fast_clear_state)(struct anv_cmd_buffer *cmd_buffer,
                           const struct anv_image *image,
--- a/src/intel/vulkan/anv_internal_kernels.c
+++ b/src/intel/vulkan/anv_internal_kernels.c
@@ -25,7 +25,8 @@

 #include "compiler/brw_compiler.h"
 #include "compiler/brw_nir.h"
-#include "compiler/spirv/nir_spirv.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
 #include "dev/intel_debug.h"
 #include "util/macros.h"

@@ -33,78 +34,6 @@

 #include "anv_internal_kernels.h"

-#include "shaders/generated_draws_spv.h"
-#include "shaders/query_copy_compute_spv.h"
-#include "shaders/query_copy_fragment_spv.h"
-#include "shaders/memcpy_compute_spv.h"
-
-static bool
-lower_vulkan_descriptors_instr(nir_builder *b, nir_intrinsic_instr *intrin,
-                               void *cb_data)
-{
-   if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
-      return false;
-
-   nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr;
-   assert(res_index_instr->type == nir_instr_type_intrinsic);
-   nir_intrinsic_instr *res_index_intrin =
-      nir_instr_as_intrinsic(res_index_instr);
-   assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
-
-   b->cursor = nir_after_instr(&intrin->instr);
-
-   const struct anv_internal_kernel_bind_map *bind_map = cb_data;
-   uint32_t binding = nir_intrinsic_binding(res_index_intrin);
-   assert(binding < bind_map->num_bindings);
-
-   nir_def *desc_value = NULL;
-   if (bind_map->bindings[binding].push_constant) {
-      desc_value =
-         nir_vec2(b,
-                  nir_imm_int(b, binding),
-                  nir_imm_int(b, 0));
-   } else {
-      int push_constant_binding = -1;
-      for (uint32_t i = 0; i < bind_map->num_bindings; i++) {
-         if (bind_map->bindings[i].push_constant) {
-            push_constant_binding = i;
-            break;
-         }
-      }
-      assert(push_constant_binding != -1);
-
-      desc_value =
-         nir_load_ubo(b, 1, 64,
-                      nir_imm_int(b, push_constant_binding),
-                      nir_imm_int(b,
-                                  bind_map->bindings[binding].address_offset),
-                      .align_mul = 8,
-                      .align_offset = 0,
-                      .range_base = 0,
-                      .range = ~0);
-      desc_value =
-         nir_vec4(b,
-                  nir_unpack_64_2x32_split_x(b, desc_value),
-                  nir_unpack_64_2x32_split_y(b, desc_value),
-                  nir_imm_int(b, 0),
-                  nir_imm_int(b, 0));
-   }
-
-   nir_def_rewrite_uses(&intrin->def, desc_value);
-
-   return true;
-}
-
-static bool
-lower_vulkan_descriptors(nir_shader *shader,
-                         const struct anv_internal_kernel_bind_map *bind_map)
-{
-   return nir_shader_intrinsics_pass(shader, lower_vulkan_descriptors_instr,
-                                       nir_metadata_block_index |
-                                       nir_metadata_dominance,
-                                       (void *)bind_map);
-}
-
 static bool
 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
                        UNUSED void *data)
@@ -117,60 +46,44 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
   return true;
 }

-static bool
-lower_load_ubo_to_uniforms(nir_builder *b, nir_intrinsic_instr *intrin,
-                           void *cb_data)
+static void
+link_libanv(nir_shader *nir, const nir_shader *libanv)
 {
-   if (intrin->intrinsic != nir_intrinsic_load_ubo)
-      return false;
-
-   b->cursor = nir_instr_remove(&intrin->instr);
-
-   nir_def_rewrite_uses(
-      &intrin->def,
-      nir_load_uniform(b,
-                       intrin->def.num_components,
-                       intrin->def.bit_size,
-                       intrin->src[1].ssa,
-                       .base = 0,
-                       .range = intrin->def.num_components *
-                                intrin->def.bit_size / 8));
-
-   return true;
+   nir_link_shader_functions(nir, libanv);
+   NIR_PASS_V(nir, nir_inline_functions);
+   NIR_PASS_V(nir, nir_remove_non_entrypoints);
+   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
+              glsl_get_cl_type_size_align);
+   NIR_PASS_V(nir, nir_opt_deref);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_explicit_io,
+              nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
+                 nir_var_mem_global,
+              nir_address_format_62bit_generic);
 }

 static struct anv_shader_bin *
-compile_upload_spirv(struct anv_device *device,
-                     gl_shader_stage stage,
-                     const char *name,
-                     const void *hash_key,
-                     uint32_t hash_key_size,
-                     const struct anv_internal_kernel_bind_map *bind_map,
-                     const uint32_t *spirv_source,
-                     uint32_t spirv_source_size,
-                     uint32_t sends_count_expectation)
+compile_shader(struct anv_device *device,
+               const nir_shader *libanv,
+               enum anv_internal_kernel_name shader_name,
+               gl_shader_stage stage,
+               const char *name,
+               const void *hash_key,
+               uint32_t hash_key_size,
+               uint32_t sends_count_expectation)
 {
-   struct spirv_to_nir_options spirv_options = {
-      .caps = {
-         .int64 = true,
-      },
-      .ubo_addr_format = nir_address_format_32bit_index_offset,
-      .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset,
-      .environment = NIR_SPIRV_VULKAN,
-      .create_library = false,
-   };
   const nir_shader_compiler_options *nir_options =
      device->physical->compiler->nir_options[stage];

-   nir_shader* nir =
-      vk_spirv_to_nir(&device->vk, spirv_source, spirv_source_size * 4,
-                      stage, "main", 0, NULL, &spirv_options,
-                      nir_options, true /* internal */,
-                      NULL);
+   nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
+                                                  "%s", name);

-   assert(nir != NULL);
+   uint32_t uniform_size =
+      anv_genX(device->info, call_internal_shader)(&b, shader_name);

-   nir->info.name = ralloc_strdup(nir, name);
+   nir_shader *nir = b.shader;
+
+   link_libanv(nir, libanv);

   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
   NIR_PASS_V(nir, nir_opt_cse);
@@ -182,6 +95,12 @@ compile_upload_spirv(struct anv_device *device,
   NIR_PASS_V(nir, nir_split_var_copies);
   NIR_PASS_V(nir, nir_split_per_member_structs);

+   if (stage == MESA_SHADER_COMPUTE) {
+      nir->info.workgroup_size[0] = 16;
+      nir->info.workgroup_size[1] = 1;
+      nir->info.workgroup_size[2] = 1;
+   }
+
   struct brw_compiler *compiler = device->physical->compiler;
   struct brw_nir_compiler_opts opts = {};
   brw_preprocess_nir(compiler, nir, &opts);
@@ -205,25 +124,12 @@ compile_upload_spirv(struct anv_device *device,
                 nir_metadata_block_index | nir_metadata_dominance, NULL);
   }

+   /* Reset sizes before gathering information */
+   nir->global_mem_size = 0;
+   nir->scratch_size = 0;
+   nir->info.shared_size = 0;
   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));

-   /* Do vectorizing here. For some reason when trying to do it in the back
-    * this just isn't working.
-    */
-   nir_load_store_vectorize_options options = {
-      .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
-      .callback = brw_nir_should_vectorize_mem,
-      .robust_modes = (nir_variable_mode)0,
-   };
-   NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
-
-   NIR_PASS_V(nir, lower_vulkan_descriptors, bind_map);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
-              nir_address_format_32bit_index_offset);
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
-              nir_address_format_64bit_global_32bit_offset);
-
   NIR_PASS_V(nir, nir_copy_prop);
   NIR_PASS_V(nir, nir_opt_constant_folding);
   NIR_PASS_V(nir, nir_opt_dce);
@@ -235,14 +141,22 @@ compile_upload_spirv(struct anv_device *device,
   memset(&prog_data, 0, sizeof(prog_data));

   if (stage == MESA_SHADER_COMPUTE) {
-      NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_load_ubo_to_uniforms,
-                 nir_metadata_block_index | nir_metadata_dominance,
-                 NULL);
-      NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, device->info,
-                 &prog_data.cs);
-      nir->num_uniforms = bind_map->push_data_size;
+      NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics,
+                 device->info, &prog_data.cs);
   }

+   /* Do vectorizing here. For some reason when trying to do it in the back
+    * this just isn't working.
+    */
+   nir_load_store_vectorize_options options = {
+      .modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
+      .callback = brw_nir_should_vectorize_mem,
+      .robust_modes = (nir_variable_mode)0,
+   };
+   NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
+
+   nir->num_uniforms = uniform_size;
+
   prog_data.base.nr_params = nir->num_uniforms / 4;

   brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
@@ -270,18 +184,21 @@ compile_upload_spirv(struct anv_device *device,
         assert(stats[stat_idx].spills == 0);
         assert(stats[stat_idx].fills == 0);
         assert(stats[stat_idx].sends == sends_count_expectation);
+         assert(stats[stat_idx].spills == 0);
         stat_idx++;
      }
      if (prog_data.wm.dispatch_16) {
         assert(stats[stat_idx].spills == 0);
         assert(stats[stat_idx].fills == 0);
         assert(stats[stat_idx].sends == sends_count_expectation);
+         assert(stats[stat_idx].spills == 0);
         stat_idx++;
      }
      if (prog_data.wm.dispatch_32) {
         assert(stats[stat_idx].spills == 0);
         assert(stats[stat_idx].fills == 0);
         assert(stats[stat_idx].sends == sends_count_expectation * 2);
+         assert(stats[stat_idx].spills == 0);
         stat_idx++;
      }
   } else {
@@ -304,6 +221,8 @@ compile_upload_spirv(struct anv_device *device,
      assert(stats.sends == sends_count_expectation);
   }

+   assert(prog_data.base.total_scratch == 0);
+
   struct anv_pipeline_bind_map empty_bind_map = {};
   struct anv_push_descriptor_info empty_push_desc_info = {};
   struct anv_shader_upload_params upload_params = {
@@ -336,6 +255,11 @@ anv_device_init_internal_kernels(struct anv_device *device)
                                   false /* needs_slm */);
   device->internal_kernels_l3_config = intel_get_l3_config(device->info, w);

+   void *mem_ctx = ralloc_context(NULL);
+
+   nir_shader *libanv_shaders =
+      anv_genX(device->info, load_libanv_shader)(device, mem_ctx);
+
   const struct {
      struct {
         char name[40];
@@ -343,134 +267,54 @@ anv_device_init_internal_kernels(struct anv_device *device)

      gl_shader_stage stage;

-      const uint32_t *spirv_data;
-      uint32_t        spirv_size;
-
      uint32_t        send_count;
-
-      struct anv_internal_kernel_bind_map bind_map;
   } internal_kernels[] = {
      [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
         .key        = {
            .name    = "anv-generated-indirect-draws",
         },
         .stage      = MESA_SHADER_FRAGMENT,
-         .spirv_data = generated_draws_spv_source,
-         .spirv_size = ARRAY_SIZE(generated_draws_spv_source),
-         .send_count = /* 2 * (2 loads + 3 stores) +  ** gfx11 **
-                        * 2 * (2 loads + 6 stores) +  ** gfx9  **
-                        * 1 load + 3 store
-                        */ 29,
-         .bind_map   = {
-            .num_bindings = 5,
-            .bindings     = {
-               {
-                  .address_offset = offsetof(struct anv_generated_indirect_params,
-                                             indirect_data_addr),
-               },
-               {
-                  .address_offset = offsetof(struct anv_generated_indirect_params,
-                                             generated_cmds_addr),
-               },
-               {
-                  .address_offset = offsetof(struct anv_generated_indirect_params,
-                                             draw_ids_addr),
-               },
-               {
-                  .address_offset = offsetof(struct anv_generated_indirect_params,
-                                             draw_count_addr),
-               },
-               {
-                  .push_constant = true,
-               },
-            },
-            .push_data_size = sizeof(struct anv_generated_indirect_params),
-         },
+         .send_count =  (device->info->ver == 9 ?
+                         /* 1 load +
+                          * 4 stores +
+                          * 2 * (2 loads + 2 stores) +
+                          * 3 stores
+                          */
+                         16 :
+                         /* 1 load +
+                          * 2 * (2 loads + 3 stores) +
+                          * 3 stores
+                          */
+                         14),
      },
      [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
         .key        = {
            .name    = "anv-copy-query-compute",
         },
         .stage      = MESA_SHADER_COMPUTE,
-         .spirv_data = query_copy_compute_spv_source,
-         .spirv_size = ARRAY_SIZE(query_copy_compute_spv_source),
         .send_count = device->info->verx10 >= 125 ?
                       9 /* 4 loads + 4 stores + 1 EOT */ :
                       8 /* 3 loads + 4 stores + 1 EOT */,
-         .bind_map   = {
-            .num_bindings = 3,
-            .bindings     = {
-               {
-                  .address_offset = offsetof(struct anv_query_copy_params,
-                                             query_data_addr),
-               },
-               {
-                  .address_offset = offsetof(struct anv_query_copy_params,
-                                             destination_addr),
-               },
-               {
-                  .push_constant = true,
-               },
-            },
-            .push_data_size = sizeof(struct anv_query_copy_params),
-         },
      },
      [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
         .key        = {
            .name    = "anv-copy-query-fragment",
         },
         .stage      = MESA_SHADER_FRAGMENT,
-         .spirv_data = query_copy_fragment_spv_source,
-         .spirv_size = ARRAY_SIZE(query_copy_fragment_spv_source),
         .send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
-         .bind_map   = {
-            .num_bindings = 3,
-            .bindings     = {
-               {
-                  .address_offset = offsetof(struct anv_query_copy_params,
-                                             query_data_addr),
-               },
-               {
-                  .address_offset = offsetof(struct anv_query_copy_params,
-                                             destination_addr),
-               },
-               {
-                  .push_constant = true,
-               },
-            },
-            .push_data_size = sizeof(struct anv_query_copy_params),
-         },
      },
      [ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = {
         .key        = {
            .name    = "anv-memcpy-compute",
         },
         .stage      = MESA_SHADER_COMPUTE,
-         .spirv_data = memcpy_compute_spv_source,
-         .spirv_size = ARRAY_SIZE(memcpy_compute_spv_source),
         .send_count = device->info->verx10 >= 125 ?
                       10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ :
                       9 /* 4 loads + 4 stores + 1 EOT */,
-         .bind_map   = {
-            .num_bindings = 3,
-            .bindings     = {
-               {
-                  .address_offset = offsetof(struct anv_memcpy_params,
-                                             src_addr),
-               },
-               {
-                  .address_offset = offsetof(struct anv_memcpy_params,
-                                             dst_addr),
-               },
-               {
-                  .push_constant = true,
-               },
-            },
-            .push_data_size = sizeof(struct anv_memcpy_params),
-         },
      },
   };

+   VkResult result = VK_SUCCESS;
   for (uint32_t i = 0; i < ARRAY_SIZE(internal_kernels); i++) {
      device->internal_kernels[i] =
         anv_device_search_for_kernel(device,
@@ -480,18 +324,19 @@ anv_device_init_internal_kernels(struct anv_device *device)
                                      NULL);
      if (device->internal_kernels[i] == NULL) {
         device->internal_kernels[i] =
-            compile_upload_spirv(device,
-                                 internal_kernels[i].stage,
-                                 internal_kernels[i].key.name,
-                                 &internal_kernels[i].key,
-                                 sizeof(internal_kernels[i].key),
-                                 &internal_kernels[i].bind_map,
-                                 internal_kernels[i].spirv_data,
-                                 internal_kernels[i].spirv_size,
-                                 internal_kernels[i].send_count);
+            compile_shader(device,
+                           libanv_shaders,
+                           i,
+                           internal_kernels[i].stage,
+                           internal_kernels[i].key.name,
+                           &internal_kernels[i].key,
+                           sizeof(internal_kernels[i].key),
+                           internal_kernels[i].send_count);
+      }
+      if (device->internal_kernels[i] == NULL) {
+         result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+         goto error;
      }
-      if (device->internal_kernels[i] == NULL)
-         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

      /* The cache already has a reference and it's not going anywhere so
       * there is no need to hold a second reference.
@@ -499,7 +344,8 @@ anv_device_init_internal_kernels(struct anv_device *device)
      anv_shader_bin_unref(device, device->internal_kernels[i]);
   }

-   return VK_SUCCESS;
+ error:
+   return result;
 }

 void
--- a/src/intel/vulkan/anv_internal_kernels.h
+++ b/src/intel/vulkan/anv_internal_kernels.h
@@ -24,21 +24,51 @@
 #ifndef ANV_GENERATED_INDIRECT_DRAWS_H
 #define ANV_GENERATED_INDIRECT_DRAWS_H

-#include "shaders/interface.h"
+#include "libintel_shaders.h"

-struct PACKED anv_generated_indirect_params {
-   struct anv_generated_indirect_draw_params draw;
+struct PACKED anv_gen_indirect_params {
+   /* Draw ID buffer address (only used on Gfx9) */
+   uint64_t draw_id_addr;

-   /* Global address of binding 0 */
+   /* Indirect data buffer address (only used on Gfx9) */
   uint64_t indirect_data_addr;

-   /* Global address of binding 1 */
+   /* Stride between each elements of the indirect data buffer */
+   uint32_t indirect_data_stride;
+
+   uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws */
+
+   /* Base number of the draw ID, it is added to the index computed from the
+    * gl_FragCoord
+    */
+   uint32_t draw_base;
+
+   /* Maximum number of draws (equals to draw_count for indirect draws without
+    * an indirect count)
+    */
+   uint32_t max_draw_count;
+
+   /* Number of draws to generate in the ring buffer (only useful in ring
+    * buffer mode)
+    */
+   uint32_t ring_count;
+
+   /* Instance multiplier for multi view */
+   uint32_t instance_multiplier;
+
+   /* Address where to jump at to generate further draws (used with ring mode)
+    */
+   uint64_t gen_addr;
+
+   /* Address where to jump at after the generated draw (only used with
+    * indirect draw count variants)
+    */
+   uint64_t end_addr;
+
+   /* Destination of the generated draw commands */
   uint64_t generated_cmds_addr;

-   /* Global address of binding 2 */
-   uint64_t draw_ids_addr;
-
-   /* Global address of binding 3 (points to the draw_count field above) */
+   /* Draw count address (points to the draw_count field in cases) */
   uint64_t draw_count_addr;

   /* Draw count value for non count variants of draw indirect commands */
@@ -48,31 +78,53 @@ struct PACKED anv_generated_indirect_params {
    * split into smaller chunks, see while loop in
    * genX(cmd_buffer_emit_indirect_generated_draws)
    */
-   struct anv_generated_indirect_params *prev;
+   struct anv_gen_indirect_params *prev;
 };

 struct PACKED anv_query_copy_params {
-   struct anv_query_copy_shader_params copy;
+   /* ANV_COPY_QUERY_FLAG_* flags */
+   uint32_t flags;

+   /* Number of queries to copy */
+   uint32_t num_queries;
+
+   /* Number of items to write back in the results per query */
+   uint32_t num_items;
+
+   /* First query to copy result from */
+   uint32_t query_base;
+
+   /* Query stride in bytes */
+   uint32_t query_stride;
+
+   /* Offset at which the data should be read from */
+   uint32_t query_data_offset;
+
+   /* Stride of destination writes */
+   uint32_t destination_stride;
+
+   /* We need to be 64 bit aligned, or 32 bit builds get
+    * very unhappy.
+    */
+   uint32_t padding;
+
+   /* Address of the query pool */
   uint64_t query_data_addr;

+   /* Destination address of the results */
   uint64_t destination_addr;
 };

-/* This needs to match memcpy_compute.glsl :
- *
- *    layout(set = 0, binding = 2) uniform block
- */
-struct PACKED anv_memcpy_shader_params {
-   uint32_t num_dwords;
-   uint32_t pad;
-};
-
 struct PACKED anv_memcpy_params {
-   struct anv_memcpy_shader_params copy;
+   /* Number of dwords to copy*/
+   uint32_t num_dwords;

+   uint32_t pad;
+
+   /* Source address of the copy */
   uint64_t src_addr;

+   /* Destination address of the copy */
   uint64_t dst_addr;
 };

--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1635,20 +1635,6 @@ enum anv_internal_kernel_name {
   ANV_INTERNAL_KERNEL_COUNT,
 };

-struct anv_internal_kernel_bind_map {
-   uint32_t num_bindings;
-   struct {
-      /* Whether this binding is provided through push constants */
-      bool     push_constant;
-
-      /* When not provided by push constants, this is offset at which the
-       * 64bit address of the binding is located in the push constant data.
-       */
-      uint32_t address_offset;
-   } bindings[5];
-   uint32_t push_data_size;
-};
-
 enum anv_rt_bvh_build_method {
   ANV_BVH_BUILD_METHOD_TRIVIAL,
   ANV_BVH_BUILD_METHOD_NEW_SAH,
--- a/src/intel/vulkan/anv_utrace.c
+++ b/src/intel/vulkan/anv_utrace.c
@@ -138,15 +138,13 @@ anv_device_utrace_emit_cs_copy_ts_buffer(struct u_trace_context *utctx,
   struct anv_memcpy_params *params = push_data_state.map;

   *params = (struct anv_memcpy_params) {
-      .copy = {
-         .num_dwords = count * sizeof(union anv_utrace_timestamp) / 4,
-      },
-      .src_addr = anv_address_physical(from_addr),
-      .dst_addr = anv_address_physical(to_addr),
+      .num_dwords = count * sizeof(union anv_utrace_timestamp) / 4,
+      .src_addr   = anv_address_physical(from_addr),
+      .dst_addr   = anv_address_physical(to_addr),
   };

   anv_genX(device->info, emit_simple_shader_dispatch)(
-      &submit->simple_state, DIV_ROUND_UP(params->copy.num_dwords, 4),
+      &submit->simple_state, DIV_ROUND_UP(params->num_dwords, 4),
      push_data_state);
 }

--- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
+++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
@@ -60,7 +60,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,

   struct anv_state push_data_state =
      genX(simple_shader_alloc_push)(simple_state,
-                                     sizeof(struct anv_generated_indirect_params));
+                                     sizeof(struct anv_gen_indirect_params));
   if (push_data_state.map == NULL)
      return ANV_STATE_NULL;

@@ -73,42 +73,37 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
   if (anv_address_is_null(count_addr)) {
      draw_count_addr = anv_address_add(
         genX(simple_shader_push_state_address)(simple_state, push_data_state),
-         offsetof(struct anv_generated_indirect_params, draw_count));
+         offsetof(struct anv_gen_indirect_params, draw_count));
   } else {
      draw_count_addr = count_addr;
   }

-   struct anv_generated_indirect_params *push_data = push_data_state.map;
-   *push_data = (struct anv_generated_indirect_params) {
-      .draw                      = {
-         .draw_id_addr           = anv_address_physical(draw_id_addr),
-         .indirect_data_addr     = anv_address_physical(indirect_data_addr),
-         .indirect_data_stride   = indirect_data_stride,
-         .flags                  = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
-                                   (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
-                                   (cmd_buffer->state.conditional_render_enabled ?
-                                    ANV_GENERATED_FLAG_PREDICATED : 0) |
-                                   ((vs_prog_data->uses_firstvertex ||
-                                     vs_prog_data->uses_baseinstance) ?
-                                    ANV_GENERATED_FLAG_BASE : 0) |
-                                   (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
-                                   (anv_mocs(device, indirect_data_addr.bo,
-                                             ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
-                                   (!anv_address_is_null(count_addr) ?
-                                    ANV_GENERATED_FLAG_COUNT : 0) |
-                                   (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
-                                   ((generated_cmd_stride / 4) << 16) |
-                                   device->info->ver << 24,
-         .draw_base              = item_base,
-         .max_draw_count         = max_count,
-         .ring_count             = ring_count,
-         .instance_multiplier    = pipeline->instance_multiplier,
-      },
-      .draw_count                = anv_address_is_null(count_addr) ? max_count : 0,
-      .indirect_data_addr        = anv_address_physical(indirect_data_addr),
-      .generated_cmds_addr       = anv_address_physical(generated_cmds_addr),
-      .draw_ids_addr             = anv_address_physical(draw_id_addr),
-      .draw_count_addr           = anv_address_physical(draw_count_addr),
+   struct anv_gen_indirect_params *push_data = push_data_state.map;
+   *push_data = (struct anv_gen_indirect_params) {
+      .draw_id_addr           = anv_address_physical(draw_id_addr),
+      .indirect_data_addr     = anv_address_physical(indirect_data_addr),
+      .indirect_data_stride   = indirect_data_stride,
+      .flags                  = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
+                                (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
+                                (cmd_buffer->state.conditional_render_enabled ?
+                                 ANV_GENERATED_FLAG_PREDICATED : 0) |
+                                ((vs_prog_data->uses_firstvertex ||
+                                  vs_prog_data->uses_baseinstance) ?
+                                 ANV_GENERATED_FLAG_BASE : 0) |
+                                (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
+                                (anv_mocs(device, indirect_data_addr.bo,
+                                          ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
+                                (!anv_address_is_null(count_addr) ?
+                                 ANV_GENERATED_FLAG_COUNT : 0) |
+                                (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
+                                ((generated_cmd_stride / 4) << 16),
+      .draw_base              = item_base,
+      .max_draw_count         = max_count,
+      .ring_count             = ring_count,
+      .instance_multiplier    = pipeline->instance_multiplier,
+      .draw_count             = anv_address_is_null(count_addr) ? max_count : 0,
+      .generated_cmds_addr    = anv_address_physical(generated_cmds_addr),
+      .draw_count_addr        = anv_address_physical(draw_count_addr),
   };

   genX(emit_simple_shader_dispatch)(simple_state, item_count, push_data_state);
@@ -212,7 +207,7 @@ genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)

 static void
 genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
-                                          struct anv_generated_indirect_params *params)
+                                          struct anv_gen_indirect_params *params)
 {
   /* We don't know the end_addr until we have emitted all the generation
    * draws. Go and edit the address of all the push parameters.
@@ -220,7 +215,7 @@ genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
   uint64_t end_addr =
      anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
   while (params != NULL) {
-      params->draw.end_addr = end_addr;
+      params->end_addr = end_addr;
      params = params->prev;
   }
 }
@@ -295,7 +290,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cm
   const uint32_t draw_cmd_stride =
      genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);

-   struct anv_generated_indirect_params *last_params = NULL;
+   struct anv_gen_indirect_params *last_params = NULL;
   uint32_t item_base = 0;
   while (item_base < max_draw_count) {
      const uint32_t item_count = MIN2(max_draw_count - item_base,
@@ -329,7 +324,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cm
            max_draw_count,
            indexed,
            0 /* ring_count */);
-      struct anv_generated_indirect_params *params = params_state.map;
+      struct anv_gen_indirect_params *params = params_state.map;
      if (params == NULL)
         return;

@@ -498,7 +493,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
         max_draw_count,
         indexed,
         ring_count);
-   struct anv_generated_indirect_params *params = params_state.map;
+   struct anv_gen_indirect_params *params = params_state.map;

   anv_add_pending_pipe_bits(cmd_buffer,
 #if GFX_VER == 9
@@ -561,7 +556,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
      struct anv_address draw_base_addr = anv_address_add(
         genX(simple_shader_push_state_address)(
            &simple_state, params_state),
-         offsetof(struct anv_generated_indirect_params, draw.draw_base));
+         offsetof(struct anv_gen_indirect_params, draw_base));

      const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device,
                                                 &draw_base_addr);
@@ -594,8 +589,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
                                ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
                                "after generated draws end");

-      params->draw.gen_addr = anv_address_physical(inc_addr);
-      params->draw.end_addr = anv_address_physical(end_addr);
+      params->gen_addr = anv_address_physical(inc_addr);
+      params->end_addr = anv_address_physical(end_addr);
   }
 }

--- a/src/intel/vulkan/genX_internal_kernels.c
+++ b/src/intel/vulkan/genX_internal_kernels.c
@@ -0,0 +1,111 @@
+/* Copyright © 2023 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "anv_private.h"
+#include "anv_internal_kernels.h"
+
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_serialize.h"
+
+#if GFX_VERx10 == 90
+# include "intel_gfx9_shaders_code.h"
+#elif GFX_VERx10 == 110
+# include "intel_gfx11_shaders_code.h"
+#elif GFX_VERx10 == 120
+# include "intel_gfx12_shaders_code.h"
+#elif GFX_VERx10 == 125
+# include "intel_gfx125_shaders_code.h"
+#elif GFX_VERx10 == 200
+# include "intel_gfx20_shaders_code.h"
+#else
+# error "Unsupported generation"
+#endif
+
+#include "genxml/gen_macros.h"
+
+#define load_param(b, bit_size, struct_name, field_name)          \
+   nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0),            \
+                    .base = offsetof(struct_name, field_name),   \
+                    .range = bit_size / 8)
+
+static nir_def *
+load_fragment_index(nir_builder *b)
+{
+   nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
+   return nir_iadd(b,
+                   nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
+                   nir_channel(b, pos_in, 0));
+}
+
+static nir_def *
+load_compute_index(nir_builder *b)
+{
+   return nir_channel(b, nir_load_global_invocation_id_zero_base(b, 32), 0);
+}
+
+nir_shader *
+genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx)
+{
+   const nir_shader_compiler_options *nir_options =
+      device->physical->compiler->nir_options[MESA_SHADER_KERNEL];
+
+   struct blob_reader blob;
+   blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
+                    sizeof(genX(intel_shaders_nir)));
+   return nir_deserialize(mem_ctx, nir_options, &blob);
+}
+
+uint32_t
+genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_name)
+{
+   switch (shader_name) {
+   case ANV_INTERNAL_KERNEL_GENERATED_DRAWS:
+      genX(libanv_write_draw)(
+         b,
+         load_param(b, 64, struct anv_gen_indirect_params, generated_cmds_addr),
+         load_param(b, 64, struct anv_gen_indirect_params, indirect_data_addr),
+         load_param(b, 64, struct anv_gen_indirect_params, draw_id_addr),
+         load_param(b, 32, struct anv_gen_indirect_params, indirect_data_stride),
+         load_param(b, 64, struct anv_gen_indirect_params, draw_count_addr),
+         load_param(b, 32, struct anv_gen_indirect_params, draw_base),
+         load_param(b, 32, struct anv_gen_indirect_params, instance_multiplier),
+         load_param(b, 32, struct anv_gen_indirect_params, max_draw_count),
+         load_param(b, 32, struct anv_gen_indirect_params, flags),
+         load_param(b, 32, struct anv_gen_indirect_params, ring_count),
+         load_param(b, 64, struct anv_gen_indirect_params, gen_addr),
+         load_param(b, 64, struct anv_gen_indirect_params, end_addr),
+         load_fragment_index(b));
+      return sizeof(struct anv_gen_indirect_params);
+
+   case ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE:
+   case ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT:
+      genX(libanv_query_copy)(
+         b,
+         load_param(b, 64, struct anv_query_copy_params, destination_addr),
+         load_param(b, 32, struct anv_query_copy_params, destination_stride),
+         load_param(b, 64, struct anv_query_copy_params, query_data_addr),
+         load_param(b, 32, struct anv_query_copy_params, query_base),
+         load_param(b, 32, struct anv_query_copy_params, num_queries),
+         load_param(b, 32, struct anv_query_copy_params, query_data_offset),
+         load_param(b, 32, struct anv_query_copy_params, query_stride),
+         load_param(b, 32, struct anv_query_copy_params, num_items),
+         load_param(b, 32, struct anv_query_copy_params, flags),
+         shader_name == ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE ?
+         load_compute_index(b) : load_fragment_index(b));
+      return sizeof(struct anv_query_copy_params);
+
+   case ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE:
+      genX(libanv_memcpy)(
+         b,
+         load_param(b, 64, struct anv_memcpy_params, dst_addr),
+         load_param(b, 64, struct anv_memcpy_params, src_addr),
+         load_param(b, 32, struct anv_memcpy_params, num_dwords),
+         nir_imul_imm(b, load_compute_index(b), 4));
+      return sizeof(struct anv_memcpy_params);
+
+   default:
+      unreachable("Invalid shader name");
+      break;
+   }
+}
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -1799,20 +1799,18 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
   }

   *params = (struct anv_query_copy_params) {
-      .copy = {
-         .flags              = copy_flags,
-         .num_queries        = query_count,
-         .num_items          = num_items,
-         .query_base         = first_query,
-         .query_stride       = pool->stride,
-         .query_data_offset  = data_offset,
-         .destination_stride = dest_stride,
-      },
-      .query_data_addr  = anv_address_physical(
+      .flags              = copy_flags,
+      .num_queries        = query_count,
+      .num_items          = num_items,
+      .query_base         = first_query,
+      .query_stride       = pool->stride,
+      .query_data_offset  = data_offset,
+      .destination_stride = dest_stride,
+      .query_data_addr    = anv_address_physical(
         (struct anv_address) {
            .bo = pool->bo,
         }),
-      .destination_addr = anv_address_physical(dest_addr),
+      .destination_addr   = anv_address_physical(dest_addr),
   };

   genX(emit_simple_shader_dispatch)(&state, query_count, push_data_state);
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -18,8 +18,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

-subdir('shaders')
-
 inc_anv = include_directories('.')

 anv_flags = [
@@ -110,6 +108,7 @@ anv_per_hw_ver_files = files(
  'genX_gfx_state.c',
  'genX_gpu_memcpy.c',
  'genX_init_state.c',
+  'genX_internal_kernels.c',
  'genX_pipeline.c',
  'genX_query.c',
  'genX_simple_shader.c',
@@ -121,7 +120,7 @@ endif
 foreach _gfx_ver : ['90', '110', '120', '125', '200']
  libanv_per_hw_ver_libs += static_library(
    'anv_per_hw_ver@0@'.format(_gfx_ver),
-    [anv_per_hw_ver_files, anv_entrypoints[0], anv_internal_spvs, ],
+    [anv_per_hw_ver_files, anv_entrypoints[0]],
    include_directories : [
      inc_include, inc_src, inc_intel,
    ],
@@ -131,7 +130,7 @@ foreach _gfx_ver : ['90', '110', '120', '125', '200']
      dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml,
      idep_vulkan_util_headers, idep_vulkan_wsi_headers,
      idep_vulkan_runtime_headers, idep_intel_driver_ds_headers,
-      idep_grl,
+      idep_grl, idep_intel_shaders,
    ],
  )
 endforeach
@@ -207,6 +206,7 @@ anv_deps = [
  idep_vulkan_util_headers,
  idep_vulkan_runtime_headers,
  idep_vulkan_wsi_headers,
+  idep_intel_shaders,
 ]

 if with_platform_x11
@@ -234,8 +234,7 @@ libanv_common = static_library(
  'anv_common',
  [
    libanv_files, anv_entrypoints, sha1_h,
-    gen_xml_pack, float64_spv_h,
-    anv_internal_spvs,
+    gen_xml_pack, intel_float64_spv_h,
  ],
  include_directories : [
    inc_include, inc_src, inc_intel,
@@ -298,7 +297,7 @@ if with_tests
    dependencies : [
      dep_thread, dep_dl, dep_m, anv_deps,
      idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime,
-      idep_mesautil, idep_intel_dev,
+      idep_mesautil, idep_intel_dev, idep_intel_shaders,
    ],
    c_args : anv_flags,
    gnu_symbol_visibility : 'hidden',
@@ -328,6 +327,7 @@ if with_tests
        idep_gtest, dep_libdrm, dep_thread, dep_m, dep_valgrind,
        idep_vulkan_util, idep_vulkan_wsi_headers,
        idep_vulkan_runtime, idep_intel_driver_ds, idep_intel_dev,
+        idep_intel_shaders,
      ],
      include_directories : [
        inc_include, inc_src, inc_intel,
--- a/src/intel/vulkan/shaders/common_generated_draws.glsl
+++ b/src/intel/vulkan/shaders/common_generated_draws.glsl
@@ -1,160 +0,0 @@
-/*
- * Copyright © 2022 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "interface.h"
-
-/* All storage bindings will be accessed through A64 messages */
-layout(set = 0, binding = 0, std430) buffer Storage0 {
-   uint indirect_data[];
-};
-
-layout(set = 0, binding = 1, std430) buffer Storage1 {
-   uint commands[];
-};
-
-layout(set = 0, binding = 2, std430) buffer Storage2 {
-   uint draw_ids[];
-};
-
-/* We're not using a uniform block for this because our compiler
- * infrastructure relies on UBOs to be 32-bytes aligned so that we can push
- * them into registers. This value can come directly from the indirect buffer
- * given to indirect draw commands and the requirement there is 4-bytes
- * alignment.
- *
- * Also use a prefix to the variable to remember to make a copy of it, avoid
- * unnecessary accesses.
- */
-layout(set = 0, binding = 3) buffer Storage3 {
-   uint _draw_count;
-};
-
-/* This data will be provided through push constants. */
-layout(set = 0, binding = 4) uniform block {
-   anv_generated_indirect_draw_params params;
-};
-
-void write_VERTEX_BUFFER_STATE(uint write_offset,
-                               uint mocs,
-                               uint buffer_idx,
-                               uint64_t address,
-                               uint size)
-{
-   commands[write_offset + 0] = (0          << 0  |    /* Buffer Pitch */
-                                 0          << 13 |    /* Null Vertex Buffer */
-                                 1          << 14 |    /* Address Modify Enable */
-                                 mocs       << 16 |    /* MOCS */
-                                 buffer_idx << 26);    /* Vertex Buffer Index */
-   commands[write_offset + 1]  = uint(address & 0xffffffff);
-   commands[write_offset + 2]  = uint(address >> 32);
-   commands[write_offset + 3]  = size;
-}
-
-void write_3DPRIMITIVE(uint write_offset,
-                       bool is_predicated,
-                       bool is_indexed,
-                       uint vertex_count_per_instance,
-                       uint start_vertex_location,
-                       uint instance_count,
-                       uint start_instance_location,
-                       uint base_vertex_location)
-{
-   commands[write_offset + 0] = (3 << 29 |         /* Command Type */
-                                 3 << 27 |         /* Command SubType */
-                                 3 << 24 |         /* 3D Command Opcode */
-                                 uint(is_predicated) << 8 |
-                                 5 << 0);          /* DWord Length */
-   commands[write_offset + 1] = uint(is_indexed) << 8;
-   commands[write_offset + 2] = vertex_count_per_instance;
-   commands[write_offset + 3] = start_vertex_location;
-   commands[write_offset + 4] = instance_count;
-   commands[write_offset + 5] = start_instance_location;
-   commands[write_offset + 6] = base_vertex_location;
-}
-
-void write_3DPRIMITIVE_EXTENDED(uint write_offset,
-                                bool is_predicated,
-                                bool is_indexed,
-                                bool use_tbimr,
-                                uint vertex_count_per_instance,
-                                uint start_vertex_location,
-                                uint instance_count,
-                                uint start_instance_location,
-                                uint base_vertex_location,
-                                uint param_base_vertex,
-                                uint param_base_instance,
-                                uint param_draw_id)
-{
-   commands[write_offset + 0] = (3 << 29 |         /* Command Type */
-                                 3 << 27 |         /* Command SubType */
-                                 3 << 24 |         /* 3D Command Opcode */
-                                 uint(use_tbimr) << 13 |
-                                 1 << 11 |         /* Extended Parameter Enable */
-                                 uint(is_predicated) << 8 |
-                                 8 << 0);          /* DWord Length */
-   commands[write_offset + 1] = uint(is_indexed) << 8;
-   commands[write_offset + 2] = vertex_count_per_instance;
-   commands[write_offset + 3] = start_vertex_location;
-   commands[write_offset + 4] = instance_count;
-   commands[write_offset + 5] = start_instance_location;
-   commands[write_offset + 6] = base_vertex_location;
-   commands[write_offset + 7] = param_base_vertex;
-   commands[write_offset + 8] = param_base_instance;
-   commands[write_offset + 9] = param_draw_id;
-}
-
-void write_MI_BATCH_BUFFER_START(uint write_offset,
-                                 uint64_t addr)
-{
-   commands[write_offset + 0] = (0  << 29 | /* Command Type */
-                                 49 << 23 | /* MI Command Opcode */
-                                 1  << 8  | /* Address Space Indicator (PPGTT) */
-                                 1  << 0);  /* DWord Length */
-   commands[write_offset + 1] = uint(addr & 0xffffffff);
-   commands[write_offset + 2] = uint(addr >> 32);
-}
-
-void end_generated_draws(uint item_idx, uint cmd_idx, uint draw_id, uint draw_count)
-{
-   uint _3dprim_dw_size = (params.flags >> 16) & 0xff;
-   bool indirect_count = (params.flags & ANV_GENERATED_FLAG_COUNT) != 0;
-   bool ring_mode = (params.flags & ANV_GENERATED_FLAG_RING_MODE) != 0;
-   /* We can have an indirect draw count = 0. */
-   uint last_draw_id = draw_count == 0 ? 0 : (min(draw_count, params.max_draw_count) - 1);
-   uint jump_offset = draw_count == 0 ? 0 : _3dprim_dw_size;
-
-   if (ring_mode) {
-      if (draw_id == last_draw_id) {
-         /* Exit the ring buffer to the next user commands */
-         write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.end_addr);
-      } else if (item_idx == (params.ring_count - 1)) {
-         /* Jump back to the generation shader to generate mode draws */
-         write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.gen_addr);
-      }
-   } else {
-      if (draw_id == last_draw_id && draw_count < params.max_draw_count) {
-         /* Skip forward to the end of the generated draws */
-         write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.end_addr);
-      }
-   }
-}
--- a/src/intel/vulkan/shaders/common_query_copy.glsl
+++ b/src/intel/vulkan/shaders/common_query_copy.glsl
@@ -1,105 +0,0 @@
-/*
- * Copyright © 2023 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "interface.h"
-
-/* These 3 bindings will be accessed through A64 messages */
-layout(set = 0, binding = 0, std430) buffer Storage0 {
-   uint query_data[];
-};
-
-layout(set = 0, binding = 1, std430) buffer Storage1 {
-   uint destination[];
-};
-
-/* This data will be provided through push constants. */
-layout(set = 0, binding = 2) uniform block {
-   anv_query_copy_shader_params params;
-};
-
-void query_copy(uint item_idx)
-{
-   if (item_idx >= params.num_queries)
-      return;
-
-   bool is_result64 = (params.flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0;
-   bool write_available = (params.flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0;
-   bool compute_delta = (params.flags & ANV_COPY_QUERY_FLAG_DELTA) != 0;
-   bool partial_result = (params.flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0;
-
-   uint query_byte = (params.query_base + item_idx) * params.query_stride;
-   uint query_data_byte = query_byte + params.query_data_offset;
-   uint destination_byte = item_idx * params.destination_stride;
-
-   uint64_t availability = query_data[query_byte / 4];
-
-   uint query_data_dword = query_data_byte / 4;
-   uint dest_dword = destination_byte / 4;
-   for (uint i = 0; i < params.num_items; i++) {
-      uint item_data_dword = query_data_dword + i * 2 * (compute_delta ? 2 : 1);
-
-      uint64_t v;
-      if (compute_delta) {
-         uint64_t v0 = uint64_t(query_data[item_data_dword + 0]) |
-                       (uint64_t(query_data[item_data_dword + 1]) << 32);
-         uint64_t v1 = uint64_t(query_data[item_data_dword + 2]) |
-                       (uint64_t(query_data[item_data_dword + 3]) << 32);
-
-         v = v1 - v0;
-      } else {
-
-         v = uint64_t(query_data[item_data_dword + 0]) |
-             (uint64_t(query_data[item_data_dword + 1]) << 32);
-      }
-
-      /* vkCmdCopyQueryPoolResults:
-       *
-       *    "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is
-       *     unavailable, an intermediate result between zero and the final
-       *     result value is written for that query."
-       *
-       * We write 0 as the values not being written yet, we can't really make
-       * provide any sensible value.
-       */
-      if (partial_result && availability == 0)
-         v = 0;
-
-      if (is_result64) {
-         destination[dest_dword + 0] = uint(v & 0xffffffff);
-         destination[dest_dword + 1] = uint(v >> 32);
-         dest_dword += 2;
-      } else {
-         destination[dest_dword + 0] = uint(v & 0xffffffff);
-         dest_dword += 1;
-      }
-   }
-
-   if (write_available) {
-      if (is_result64) {
-         destination[dest_dword + 0] = uint(availability & 0xffffffff);
-         destination[dest_dword + 1] = uint(availability >> 32);
-      } else {
-         destination[dest_dword + 0] = uint(availability & 0xffffffff);
-      }
-   }
-}
--- a/src/intel/vulkan/shaders/generated_draws.glsl
+++ b/src/intel/vulkan/shaders/generated_draws.glsl
@@ -1,201 +0,0 @@
-/*
- * Copyright © 2022 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#version 450
-#extension GL_ARB_gpu_shader_int64 : enable
-#extension GL_GOOGLE_include_directive : enable
-
-#include "common_generated_draws.glsl"
-
-void gfx11_write_draw(uint item_idx, uint cmd_idx, uint draw_id)
-{
-   bool is_indexed = (params.flags & ANV_GENERATED_FLAG_INDEXED) != 0;
-   bool is_predicated = (params.flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
-   bool use_tbimr = (params.flags & ANV_GENERATED_FLAG_TBIMR) != 0;
-   uint indirect_data_offset = draw_id * params.indirect_data_stride / 4;
-
-   if (is_indexed) {
-      /* Loading a VkDrawIndexedIndirectCommand */
-      uint index_count    = indirect_data[indirect_data_offset + 0];
-      uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
-      uint first_index    = indirect_data[indirect_data_offset + 2];
-      uint vertex_offset  = indirect_data[indirect_data_offset + 3];
-      uint first_instance = indirect_data[indirect_data_offset + 4];
-
-      write_3DPRIMITIVE_EXTENDED(cmd_idx,
-                                 is_predicated,
-                                 is_indexed,
-                                 use_tbimr,
-                                 index_count,
-                                 first_index,
-                                 instance_count,
-                                 first_instance,
-                                 vertex_offset,
-                                 vertex_offset,
-                                 first_instance,
-                                 draw_id);
-   } else {
-      /* Loading a VkDrawIndirectCommand structure */
-      uint vertex_count   = indirect_data[indirect_data_offset + 0];
-      uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
-      uint first_vertex   = indirect_data[indirect_data_offset + 2];
-      uint first_instance = indirect_data[indirect_data_offset + 3];
-
-      write_3DPRIMITIVE_EXTENDED(cmd_idx,
-                                 is_predicated,
-                                 is_indexed,
-                                 use_tbimr,
-                                 vertex_count,
-                                 first_vertex,
-                                 instance_count,
-                                 first_instance,
-                                 0 /* base_vertex_location */,
-                                 first_vertex,
-                                 first_instance,
-                                 draw_id);
-   }
-}
-
-void gfx9_write_draw(uint item_idx, uint cmd_idx, uint draw_id)
-{
-   bool is_indexed = (params.flags & ANV_GENERATED_FLAG_INDEXED) != 0;
-   bool is_predicated = (params.flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
-   bool uses_base = (params.flags & ANV_GENERATED_FLAG_BASE) != 0;
-   bool uses_drawid = (params.flags & ANV_GENERATED_FLAG_DRAWID) != 0;
-   uint mocs = (params.flags >> 8) & 0xff;
-   uint indirect_data_offset = draw_id * params.indirect_data_stride / 4;
-
-   if (is_indexed) {
-      /* Loading a VkDrawIndexedIndirectCommand */
-      uint index_count    = indirect_data[indirect_data_offset + 0];
-      uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
-      uint first_index    = indirect_data[indirect_data_offset + 2];
-      uint vertex_offset  = indirect_data[indirect_data_offset + 3];
-      uint first_instance = indirect_data[indirect_data_offset + 4];
-
-      if (uses_base || uses_drawid) {
-         uint state_vertex_len =
-            1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
-         commands[cmd_idx] =
-            (3  << 29 |                    /* Command Type */
-             3  << 27 |                    /* Command SubType */
-             0  << 24 |                    /* 3D Command Opcode */
-             8  << 16 |                    /* 3D Command Sub Opcode */
-             (state_vertex_len - 2) << 0); /* DWord Length */
-         cmd_idx += 1;
-         if (uses_base) {
-            uint64_t indirect_draw_data_addr =
-               params.indirect_data_addr + item_idx * params.indirect_data_stride + 12;
-            write_VERTEX_BUFFER_STATE(cmd_idx,
-                                      mocs,
-                                      31,
-                                      indirect_draw_data_addr,
-                                      8);
-            cmd_idx += 4;
-         }
-         if (uses_drawid) {
-            uint64_t draw_idx_addr = params.draw_id_addr + 4 * item_idx;
-            draw_ids[item_idx] = draw_id;
-            write_VERTEX_BUFFER_STATE(cmd_idx,
-                                      mocs,
-                                      32,
-                                      draw_idx_addr,
-                                      4);
-            cmd_idx += 4;
-         }
-      }
-      write_3DPRIMITIVE(cmd_idx,
-                        is_predicated,
-                        is_indexed,
-                        index_count,
-                        first_index,
-                        instance_count,
-                        first_instance,
-                        vertex_offset);
-   } else {
-      /* Loading a VkDrawIndirectCommand structure */
-      uint vertex_count   = indirect_data[indirect_data_offset + 0];
-      uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
-      uint first_vertex   = indirect_data[indirect_data_offset + 2];
-      uint first_instance = indirect_data[indirect_data_offset + 3];
-
-      if (uses_base || uses_drawid) {
-         uint state_vertex_len =
-            1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
-         commands[cmd_idx] =
-               (3  << 29 |                    /* Command Type */
-                3  << 27 |                    /* Command SubType */
-                0  << 24 |                    /* 3D Command Opcode */
-                8  << 16 |                    /* 3D Command Sub Opcode */
-                (state_vertex_len - 2) << 0); /* DWord Length */
-         cmd_idx += 1;
-         if (uses_base) {
-            uint64_t indirect_draw_data_addr =
-               params.indirect_data_addr + item_idx * params.indirect_data_stride + 8;
-            write_VERTEX_BUFFER_STATE(cmd_idx,
-                                      mocs,
-                                      31,
-                                      indirect_draw_data_addr,
-                                      8);
-            cmd_idx += 4;
-         }
-         if (uses_drawid) {
-            uint64_t draw_idx_addr = params.draw_id_addr + 4 * item_idx;
-            draw_ids[item_idx] = draw_id;
-            write_VERTEX_BUFFER_STATE(cmd_idx,
-                                      mocs,
-                                      32,
-                                      draw_idx_addr,
-                                      4);
-            cmd_idx += 4;
-         }
-      }
-      write_3DPRIMITIVE(cmd_idx,
-                        is_predicated,
-                        is_indexed,
-                        vertex_count,
-                        first_vertex,
-                        instance_count,
-                        first_instance,
-                        0 /* base_vertex_location */);
-   }
-}
-
-void main()
-{
-   uint _3dprim_dw_size = (params.flags >> 16) & 0xff;
-   uint gfx_ver = (params.flags >> 24) & 0xff;
-   uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
-   uint cmd_idx = item_idx * _3dprim_dw_size;
-   uint draw_id = params.draw_base + item_idx;
-   uint draw_count = _draw_count;
-
-   if (draw_id < min(draw_count, params.max_draw_count)) {
-      if (gfx_ver == 9)
-         gfx9_write_draw(item_idx, cmd_idx, draw_id);
-      else
-         gfx11_write_draw(item_idx, cmd_idx, draw_id);
-   }
-
-   end_generated_draws(item_idx, cmd_idx, draw_id, draw_count);
-}
--- a/src/intel/vulkan/shaders/interface.h
+++ b/src/intel/vulkan/shaders/interface.h
@@ -1,121 +0,0 @@
-/*
- * Copyright © 2023 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _SHADERS_INTERFACE_H_
-#define _SHADERS_INTERFACE_H_
-
-#ifdef _IN_SHADER_
-#define BITFIELD_BIT(i) (1u << i)
-
-#define uint32_t uint
-#define PACKED
-#else
-#include "util/macros.h"
-
-#include <stdint.h>
-#endif
-
-#define ANV_GENERATED_FLAG_INDEXED    BITFIELD_BIT(0)
-#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
-/* Only used on Gfx9, means the pipeline is using gl_DrawID */
-#define ANV_GENERATED_FLAG_DRAWID     BITFIELD_BIT(2)
-/* Only used on Gfx9, means the pipeline is using gl_BaseVertex or
- * gl_BaseInstance
- */
-#define ANV_GENERATED_FLAG_BASE       BITFIELD_BIT(3)
-/* Whether the count is indirect  */
-#define ANV_GENERATED_FLAG_COUNT      BITFIELD_BIT(4)
-/* Whether the generation shader writes to the ring buffer */
-#define ANV_GENERATED_FLAG_RING_MODE  BITFIELD_BIT(5)
-/* Whether TBIMR tile-based rendering shall be enabled. */
-#define ANV_GENERATED_FLAG_TBIMR      BITFIELD_BIT(6)
-
-struct PACKED anv_generated_indirect_draw_params {
-   /* Draw ID buffer address (only used on Gfx9) */
-   uint64_t draw_id_addr;
-   /* Indirect data buffer address (only used on Gfx9) */
-   uint64_t indirect_data_addr;
-   /* Stride between each elements of the indirect data buffer */
-   uint32_t indirect_data_stride;
-   uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws, 24-31: gfx_ver */
-   /* Base number of the draw ID, it is added to the index computed from the
-    * gl_FragCoord
-    */
-   uint32_t draw_base;
-   /* Maximum number of draws (equals to draw_count for indirect draws without
-    * an indirect count)
-    */
-   uint32_t max_draw_count;
-   /* Number of draws to generate in the ring buffer (only useful in ring
-    * buffer mode)
-    */
-   uint32_t ring_count;
-   /* Instance multiplier for multi view */
-   uint32_t instance_multiplier;
-   /* Address where to jump at to generate further draws (used with ring mode)
-    */
-   uint64_t gen_addr;
-   /* Address where to jump at after the generated draw (only used with
-    * indirect draw count variants)
-    */
-   uint64_t end_addr;
-};
-
-#define ANV_COPY_QUERY_FLAG_RESULT64  BITFIELD_BIT(0)
-#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
-#define ANV_COPY_QUERY_FLAG_DELTA     BITFIELD_BIT(2)
-#define ANV_COPY_QUERY_FLAG_PARTIAL   BITFIELD_BIT(3)
-
-/* This needs to match common_query_copy.glsl :
- *
- *    layout(set = 0, binding = 2) uniform block
- */
-struct PACKED anv_query_copy_shader_params {
-   /* ANV_COPY_QUERY_FLAG_* flags */
-   uint32_t flags;
-
-   /* Number of queries to copy */
-   uint32_t num_queries;
-
-   /* Number of items to write back in the results per query */
-   uint32_t num_items;
-
-   /* First query to copy result from */
-   uint32_t query_base;
-
-   /* Query stride in bytes */
-   uint32_t query_stride;
-
-   /* Offset at which the data should be read from */
-   uint32_t query_data_offset;
-
-   /* Stride of destination writes */
-   uint32_t destination_stride;
-
-   /* We need to be 64 bit aligned, or 32 bit builds get
-    * very unhappy.
-    */
-   uint32_t padding;
-};
-
-#endif /* _SHADERS_INTERFACE_H_ */
--- a/src/intel/vulkan/shaders/memcpy_compute.glsl
+++ b/src/intel/vulkan/shaders/memcpy_compute.glsl
@@ -1,64 +0,0 @@
-/*
- * Copyright © 2023 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#version 450
-#extension GL_ARB_gpu_shader_int64 : enable
-#extension GL_GOOGLE_include_directive : enable
-
-layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
-
-/* These 2 bindings will be accessed through A64 messages */
-layout(set = 0, binding = 0, std430) buffer Storage0 {
-   uint src[];
-};
-
-layout(set = 0, binding = 1, std430) buffer Storage1 {
-   uint dst[];
-};
-
-/* This data will be provided through push constants. */
-layout(set = 0, binding = 2) uniform block {
-   uint num_dwords;
-   uint pad;
-};
-
-void main()
-{
-   uint idx = gl_GlobalInvocationID.x * 4;
-   /* Try to do copies in single message as much as possible. */
-   if (idx + 4 <= num_dwords) {
-      dst[idx + 0] = src[idx + 0];
-      dst[idx + 1] = src[idx + 1];
-      dst[idx + 2] = src[idx + 2];
-      dst[idx + 3] = src[idx + 3];
-   } else if (idx + 3 <= num_dwords) {
-      dst[idx + 0] = src[idx + 0];
-      dst[idx + 1] = src[idx + 1];
-      dst[idx + 2] = src[idx + 2];
-   } else if (idx + 2 <= num_dwords) {
-      dst[idx + 0] = src[idx + 0];
-      dst[idx + 1] = src[idx + 1];
-   } else if (idx + 1 <= num_dwords) {
-      dst[idx + 0] = src[idx + 0];
-   }
-}
--- a/src/intel/vulkan/shaders/meson.build
+++ b/src/intel/vulkan/shaders/meson.build
@@ -1,63 +0,0 @@
-# Copyright © 2022 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-float64_spv_h = custom_target(
-  'float64_spv.h',
-  input : [glsl2spirv, float64_glsl_file],
-  output : 'float64_spv.h',
-  command : [
-    prog_python, '@INPUT@', '@OUTPUT@',
-    prog_glslang,
-    '--create-entry', 'main',
-    '--vn', 'float64_spv_source',
-    '--glsl-version', '450',
-    '-Olib',
-  ]
-)
-
-anv_internal_shaders = [
-  [ 'generated_draws.glsl',       'frag' ],
-  [ 'query_copy_compute.glsl',    'comp' ],
-  [ 'query_copy_fragment.glsl',   'frag' ],
-  [ 'memcpy_compute.glsl',        'comp' ],
-]
-
-anv_internal_spvs = []
-foreach item : anv_internal_shaders
-  f = item[0]
-  stage = item[1]
-  spv_filename = f.replace('.glsl', '_spv.h')
-  src_name = f.replace('.glsl', '_spv_source')
-  anv_internal_spvs += custom_target(
-    spv_filename,
-    input : [glsl2spirv, f, files('common_generated_draws.glsl',
-                                  'common_query_copy.glsl',
-                                  'interface.h')],
-    output : spv_filename,
-    command : [
-      prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@',
-      prog_glslang,
-      '--vn', src_name,
-      '--glsl-version', '450',
-      '--stage', stage,
-      '-I' + meson.current_source_dir(),
-      '-D_IN_SHADER_=1',
-    ])
-endforeach
--- a/src/intel/vulkan/shaders/query_copy_compute.glsl
+++ b/src/intel/vulkan/shaders/query_copy_compute.glsl
@@ -1,35 +0,0 @@
-/*
- * Copyright © 2023 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#version 450
-#extension GL_ARB_gpu_shader_int64 : enable
-#extension GL_GOOGLE_include_directive : enable
-
-#include "common_query_copy.glsl"
-
-layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
-
-void main()
-{
-   query_copy(gl_GlobalInvocationID.x);
-}
--- a/src/intel/vulkan/shaders/query_copy_fragment.glsl
+++ b/src/intel/vulkan/shaders/query_copy_fragment.glsl
@@ -1,33 +0,0 @@
-/*
- * Copyright © 2023 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#version 450
-#extension GL_ARB_gpu_shader_int64 : enable
-#extension GL_GOOGLE_include_directive : enable
-
-#include "common_query_copy.glsl"
-
-void main()
-{
-   query_copy(uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x));
-}