diff --git a/meson.build b/meson.build index b91d7bf3159..b93077ed837 100644 --- a/meson.build +++ b/meson.build @@ -298,7 +298,8 @@ with_any_broadcom = [ if ['x86_64'].contains(host_machine.cpu_family()) and \ get_option('intel-clc') != 'system' # Require intel-clc with Anv & Iris (for internal shaders) - with_intel_clc = get_option('intel-clc') == 'enabled' + with_intel_clc = get_option('intel-clc') == 'enabled' or \ + with_intel_vk else with_intel_clc = false endif diff --git a/src/intel/genxml/meson.build b/src/intel/genxml/meson.build index bb4dd4cf42e..5128ac0d252 100644 --- a/src/intel/genxml/meson.build +++ b/src/intel/genxml/meson.build @@ -119,16 +119,12 @@ endforeach genX_cl_included_symbols = [ # instructions - 'MI_BATCH_BUFFER_START', - '3DSTATE_CONSTANT_ALL', - '3DSTATE_CONSTANT_VS', - '3DSTATE_RASTER', '3DSTATE_INDEX_BUFFER', '3DSTATE_VERTEX_BUFFERS', '3DPRIMITIVE', '3DPRIMITIVE_EXTENDED', + 'MI_BATCH_BUFFER_START', # structures - '3DSTATE_CONSTANT_BODY', 'VERTEX_BUFFER_STATE', ] diff --git a/src/intel/meson.build b/src/intel/meson.build index eef1662a5f4..67c767b14d7 100644 --- a/src/intel/meson.build +++ b/src/intel/meson.build @@ -28,6 +28,7 @@ endif subdir('isl') subdir('common') subdir('compiler') +subdir('shaders') if with_intel_hasvk or with_intel_vk or with_gallium_crocus or with_gallium_iris or with_intel_tools subdir('perf') endif diff --git a/src/intel/shaders/generate.cl b/src/intel/shaders/generate.cl new file mode 100644 index 00000000000..4d256256635 --- /dev/null +++ b/src/intel/shaders/generate.cl @@ -0,0 +1,206 @@ +/* Copyright © 2023 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "libintel_shaders.h" + +void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr, + uint32_t buffer_count) +{ + struct GENX(3DSTATE_VERTEX_BUFFERS) v = { + GENX(3DSTATE_VERTEX_BUFFERS_header), + }; + v.DWordLength = 1 + (buffer_count * 4) - + GENX(3DSTATE_VERTEX_BUFFERS_length_bias); + GENX(3DSTATE_VERTEX_BUFFERS_pack)(dst_ptr, &v); +} + +void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr, + uint32_t mocs, + uint32_t buffer_idx, + uint64_t address, + uint32_t size, + uint32_t stride) +{ + bool buffer_null = address == 0; + struct GENX(VERTEX_BUFFER_STATE) v = { + .BufferPitch = stride, + .NullVertexBuffer = address == 0, + .AddressModifyEnable = true, + .MOCS = mocs, +#if GFX_VER >= 12 + .L3BypassDisable = true, +#endif + .VertexBufferIndex = buffer_idx, + .BufferStartingAddress = address, + .BufferSize = size, + }; + GENX(VERTEX_BUFFER_STATE_pack)(dst_ptr, &v); +} + +#if GFX_VER == 9 +void genX(write_3DPRIMITIVE)(global void *dst_ptr, + bool is_predicated, + bool is_indexed, + bool uses_tbimr, + uint32_t vertex_count_per_instance, + uint32_t start_vertex_location, + uint32_t instance_count, + uint32_t start_instance_location, + uint32_t base_vertex_location) +{ + struct GENX(3DPRIMITIVE) v = { + GENX(3DPRIMITIVE_header), +#if GFX_VERx10 >= 125 + .TBIMREnable = uses_tbimr, +#endif + .PredicateEnable = is_predicated, + .VertexAccessType = is_indexed ? RANDOM : SEQUENTIAL, + .VertexCountPerInstance = vertex_count_per_instance, + .StartVertexLocation = start_vertex_location, + .InstanceCount = instance_count, + .StartInstanceLocation = start_instance_location, + .BaseVertexLocation = base_vertex_location, + }; + GENX(3DPRIMITIVE_pack)(dst_ptr, &v); +} +#endif + +#if GFX_VER >= 11 +void genX(write_3DPRIMITIVE_EXTENDED)(global void *dst_ptr, + bool is_predicated, + bool is_indexed, + bool uses_tbimr, + uint32_t vertex_count_per_instance, + uint32_t start_vertex_location, + uint32_t instance_count, + uint32_t start_instance_location, + uint32_t base_vertex_location, + uint32_t param_base_vertex, + uint32_t param_base_instance, + uint32_t param_draw_id) +{ + struct GENX(3DPRIMITIVE_EXTENDED) v = { + GENX(3DPRIMITIVE_EXTENDED_header), +#if GFX_VERx10 >= 125 + .TBIMREnable = uses_tbimr, +#endif + .PredicateEnable = is_predicated, + .VertexAccessType = is_indexed ? RANDOM : SEQUENTIAL, + .VertexCountPerInstance = vertex_count_per_instance, + .StartVertexLocation = start_vertex_location, + .InstanceCount = instance_count, + .StartInstanceLocation = start_instance_location, + .BaseVertexLocation = base_vertex_location, + .ExtendedParameter0 = param_base_vertex, + .ExtendedParameter1 = param_base_instance, + .ExtendedParameter2 = param_draw_id, + }; + GENX(3DPRIMITIVE_EXTENDED_pack)(dst_ptr, &v); +} +#endif + +void genX(write_MI_BATCH_BUFFER_START)(global void *dst_ptr, uint64_t addr) +{ + struct GENX(MI_BATCH_BUFFER_START) v = { + GENX(MI_BATCH_BUFFER_START_header), + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = addr, + }; + GENX(MI_BATCH_BUFFER_START_pack)(dst_ptr, &v); +} + +void genX(write_draw)(global uint32_t *dst_ptr, + global void *indirect_ptr, + global uint32_t *draw_id_ptr, + uint32_t draw_id, + uint32_t instance_multiplier, + bool is_indexed, + bool is_predicated, + bool uses_tbimr, + bool uses_base, + bool uses_drawid, + uint32_t mocs) +{ +#if GFX_VER == 9 + if (uses_base || uses_drawid) { + uint32_t vertex_buffer_count = + (uses_base ? 1 : 0) + (uses_drawid ? 1 : 0); + genX(write_3DSTATE_VERTEX_BUFFERS)(dst_ptr, vertex_buffer_count); + dst_ptr += 1; /* GENX(3DSTATE_VERTEX_BUFFERS_length); */ + if (uses_base) { + uint64_t base_addr = (uint64_t)indirect_ptr + (is_indexed ? 12 : 8); + genX(write_VERTEX_BUFFER_STATE)(dst_ptr, mocs, 31, base_addr, 8, 0); + dst_ptr += GENX(VERTEX_BUFFER_STATE_length); + } + if (uses_drawid) { + *draw_id_ptr = draw_id; + genX(write_VERTEX_BUFFER_STATE)(dst_ptr, mocs, 32, + (uint64_t)draw_id_ptr, 4, 0); + dst_ptr += GENX(VERTEX_BUFFER_STATE_length); + } + } + + if (is_indexed) { + VkDrawIndexedIndirectCommand data = + *((global VkDrawIndexedIndirectCommand *)indirect_ptr); + + genX(write_3DPRIMITIVE)(dst_ptr, + is_predicated, + is_indexed, + uses_tbimr, + data.indexCount, + data.firstIndex, + data.instanceCount * instance_multiplier, + data.firstInstance, + data.vertexOffset); + } else { + VkDrawIndirectCommand data = + *((global VkDrawIndirectCommand *)indirect_ptr); + + genX(write_3DPRIMITIVE)(dst_ptr, + is_predicated, + is_indexed, + uses_tbimr, + data.vertexCount, + data.firstVertex, + data.instanceCount * instance_multiplier, + data.firstInstance, + 0 /* base_vertex_location */); + } +#else + if (is_indexed) { + VkDrawIndexedIndirectCommand data = + *((global VkDrawIndexedIndirectCommand *)indirect_ptr); + + genX(write_3DPRIMITIVE_EXTENDED)(dst_ptr, + is_predicated, + is_indexed, + uses_tbimr, + data.indexCount, + data.firstIndex, + data.instanceCount * instance_multiplier, + data.firstInstance, + data.vertexOffset, + data.vertexOffset, + data.firstInstance, + draw_id); + } else { + VkDrawIndirectCommand data = + *((global VkDrawIndirectCommand *)indirect_ptr); + + genX(write_3DPRIMITIVE_EXTENDED)(dst_ptr, + is_predicated, + is_indexed, + uses_tbimr, + data.vertexCount, + data.firstVertex, + data.instanceCount * instance_multiplier, + data.firstInstance, + 0 /* base_vertex_location */, + data.firstVertex, + data.firstInstance, + draw_id); + } +#endif +} diff --git a/src/intel/shaders/generate_draws.cl b/src/intel/shaders/generate_draws.cl new file mode 100644 index 00000000000..f5a29e555aa --- /dev/null +++ b/src/intel/shaders/generate_draws.cl @@ -0,0 +1,77 @@ +/* Copyright © 2023 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "libintel_shaders.h" + +static void end_generated_draws(global void *dst_ptr, + uint32_t item_idx, + uint32_t draw_id, uint32_t draw_count, + uint32_t ring_count, uint32_t max_draw_count, + uint32_t flags, + uint64_t gen_addr, uint64_t end_addr) +{ + uint32_t _3dprim_size_B = ((flags >> 16) & 0xff) * 4; + bool indirect_count = (flags & ANV_GENERATED_FLAG_COUNT) != 0; + bool ring_mode = (flags & ANV_GENERATED_FLAG_RING_MODE) != 0; + /* We can have an indirect draw count = 0. */ + uint32_t last_draw_id = draw_count == 0 ? 0 : (min(draw_count, max_draw_count) - 1); + global void *jump_dst = draw_count == 0 ? dst_ptr : (dst_ptr + _3dprim_size_B); + + if (ring_mode) { + if (draw_id == last_draw_id) { + /* Exit the ring buffer to the next user commands */ + genX(write_MI_BATCH_BUFFER_START)(jump_dst, end_addr); + } else if (item_idx == (ring_count - 1)) { + /* Jump back to the generation shader to generate mode draws */ + genX(write_MI_BATCH_BUFFER_START)(jump_dst, gen_addr); + } + } else { + if (draw_id == last_draw_id && draw_count < max_draw_count) { + /* Skip forward to the end of the generated draws */ + genX(write_MI_BATCH_BUFFER_START)(jump_dst, end_addr); + } + } +} + +void +genX(libanv_write_draw)(global void *dst_base, + global void *indirect_base, + global void *draw_id_base, + uint32_t indirect_stride, + global uint32_t *_draw_count, + uint32_t draw_base, + uint32_t instance_multiplier, + uint32_t max_draw_count, + uint32_t flags, + uint32_t ring_count, + uint64_t gen_addr, + uint64_t end_addr, + uint32_t item_idx) +{ + uint32_t _3dprim_size_B = ((flags >> 16) & 0xff) * 4; + uint32_t draw_id = draw_base + item_idx; + uint32_t draw_count = *_draw_count; + global void *dst_ptr = dst_base + item_idx * _3dprim_size_B; + global void *indirect_ptr = indirect_base + draw_id * indirect_stride; + global void *draw_id_ptr = draw_id_base + item_idx * 4; + + if (draw_id < min(draw_count, max_draw_count)) { + bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0; + bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0; + bool uses_tbimr = (flags & ANV_GENERATED_FLAG_TBIMR) != 0; + bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0; + bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0; + uint32_t mocs = (flags >> 8) & 0xff; + + genX(write_draw)(dst_ptr, indirect_ptr, draw_id_ptr, + draw_id, instance_multiplier, + is_indexed, is_predicated, + uses_tbimr, uses_base, uses_drawid, + mocs); + } + + end_generated_draws(dst_ptr, item_idx, draw_id, draw_count, + ring_count, max_draw_count, flags, + gen_addr, end_addr); +} diff --git a/src/intel/shaders/libintel_shaders.h b/src/intel/shaders/libintel_shaders.h new file mode 100644 index 00000000000..7d0144849a6 --- /dev/null +++ b/src/intel/shaders/libintel_shaders.h @@ -0,0 +1,127 @@ +/* Copyright © 2023 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef _LIBANV_SHADERS_H_ +#define _LIBANV_SHADERS_H_ + +/* Define stdint types compatible between the CPU and GPU for shared headers */ +#ifndef __OPENCL_VERSION__ +#include + +#include "util/macros.h" + +#else +#define BITFIELD_BIT(i) (1u << i) + +typedef ulong uint64_t; +typedef uint uint32_t; +typedef ushort uint16_t; +typedef uchar uint8_t; + +typedef long int64_t; +typedef int int32_t; +typedef short int16_t; +typedef char int8_t; + +typedef struct VkDrawIndexedIndirectCommand { + uint32_t indexCount; + uint32_t instanceCount; + uint32_t firstIndex; + int32_t vertexOffset; + uint32_t firstInstance; +} VkDrawIndexedIndirectCommand __attribute__((aligned(4))); + +typedef struct VkDrawIndirectCommand { + uint32_t vertexCount; + uint32_t instanceCount; + uint32_t firstVertex; + uint32_t firstInstance; +} VkDrawIndirectCommand __attribute__((aligned(4))); + +#include "genxml/gen_macros.h" +#include "genxml/genX_cl_pack.h" +#endif + +/** + * Flags for generated_draws.cl + */ +#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0) +#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1) +/* Only used on Gfx9, means the pipeline is using gl_DrawID */ +#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2) +/* Only used on Gfx9, means the pipeline is using gl_BaseVertex or + * gl_BaseInstance + */ +#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3) +/* Whether the count is indirect */ +#define ANV_GENERATED_FLAG_COUNT BITFIELD_BIT(4) +/* Whether the generation shader writes to the ring buffer */ +#define ANV_GENERATED_FLAG_RING_MODE BITFIELD_BIT(5) +/* Whether TBIMR tile-based rendering shall be enabled. */ +#define ANV_GENERATED_FLAG_TBIMR BITFIELD_BIT(6) + +/** + * Flags for query_copy.cl + */ +#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0) +#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1) +#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2) +#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3) + +#ifdef __OPENCL_VERSION__ + +void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr, + uint32_t buffer_count); + +void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr, + uint32_t mocs, + uint32_t buffer_idx, + uint64_t address, + uint32_t size, + uint32_t stride); + +#if GFX_VER == 9 +void genX(write_3DPRIMITIVE)(global void *dst_ptr, + bool is_predicated, + bool is_indexed, + bool use_tbimr, + uint32_t vertex_count_per_instance, + uint32_t start_vertex_location, + uint32_t instance_count, + uint32_t start_instance_location, + uint32_t base_vertex_location); +#endif + +#if GFX_VER >= 11 +void genX(write_3DPRIMITIVE_EXTENDED)(global void *dst_ptr, + bool is_predicated, + bool is_indexed, + bool use_tbimr, + uint32_t vertex_count_per_instance, + uint32_t start_vertex_location, + uint32_t instance_count, + uint32_t start_instance_location, + uint32_t base_vertex_location, + uint32_t param_base_vertex, + uint32_t param_base_instance, + uint32_t param_draw_id); +#endif + +void genX(write_MI_BATCH_BUFFER_START)(global void *dst_ptr, uint64_t addr); + +void genX(write_draw)(global uint32_t *dst_ptr, + global void *indirect_ptr, + global uint32_t *draw_id_ptr, + uint32_t draw_id, + uint32_t instance_multiplier, + bool is_indexed, + bool is_predicated, + bool uses_tbimr, + bool uses_base, + bool uses_draw_id, + uint32_t mocs); + +#endif /* __OPENCL_VERSION__ */ + +#endif /* _LIBANV_SHADERS_H_ */ diff --git a/src/intel/shaders/memcpy.cl b/src/intel/shaders/memcpy.cl new file mode 100644 index 00000000000..51d0ed1e1eb --- /dev/null +++ b/src/intel/shaders/memcpy.cl @@ -0,0 +1,23 @@ +/* Copyright © 2023 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +void +genX(libanv_memcpy)(global void *dst_base, + global void *src_base, + uint num_dwords, + uint dword_offset) +{ + global void *dst = dst_base + 4 * dword_offset; + global void *src = src_base + 4 * dword_offset; + + if (dword_offset + 4 <= num_dwords) { + *(global uint4 *)(dst) = *(global uint4 *)(src); + } else if (dword_offset + 3 <= num_dwords) { + *(global uint3 *)(dst) = *(global uint3 *)(src); + } else if (dword_offset + 2 <= num_dwords) { + *(global uint2 *)(dst) = *(global uint2 *)(src); + } else if (dword_offset + 1 <= num_dwords) { + *(global uint *)(dst) = *(global uint *)(src); + } +} diff --git a/src/intel/shaders/meson.build b/src/intel/shaders/meson.build new file mode 100644 index 00000000000..dacf2c98f33 --- /dev/null +++ b/src/intel/shaders/meson.build @@ -0,0 +1,63 @@ +# Copyright © 2023 Intel Corporation +# SPDX-License-Identifier: MIT + +intel_float64_spv_h = custom_target( + 'float64_spv.h', + input : [glsl2spirv, float64_glsl_file], + output : 'float64_spv.h', + command : [ + prog_python, '@INPUT@', '@OUTPUT@', + prog_glslang, + '--create-entry', 'main', + '--vn', 'float64_spv_source', + '--glsl-version', '450', + '-Olib', + ] +) + +intel_shader_files = files( + 'libintel_shaders.h', + 'generate.cl', + 'generate_draws.cl', + 'memcpy.cl', + 'query_copy.cl', +) + +prepended_input_args = [] +foreach input_arg : intel_shader_files + prepended_input_args += ['--in', input_arg] +endforeach + +intel_shaders_gens = [ [ 90, 9], + [110, 11], + [120, 12], + [125, 125], + [200, 20] ] +intel_shaders = [] +foreach gen : intel_shaders_gens + intel_shaders += custom_target( + 'intel_gfx@0@_shaders_code.h'.format(gen[1]), + input : intel_shader_files, + output : 'intel_gfx@0@_shaders_code.h'.format(gen[1]), + command : [ + prog_intel_clc, '--nir', + '--prefix', 'gfx@0@_intel_shaders'.format(gen[1]), + prepended_input_args, '-o', '@OUTPUT@', '--', + '-cl-std=cl2.0', '-D__OPENCL_VERSION__=200', + '-DGFX_VERx10=@0@'.format(gen[0]), + '-I' + join_paths(meson.current_source_dir(), '.'), + '-I' + join_paths(meson.source_root(), 'src'), + '-I' + join_paths(meson.source_root(), 'src/intel'), + '-I' + join_paths(meson.build_root(), 'src/intel'), + '-I' + join_paths(meson.source_root(), 'src/intel/genxml'), + '-include', 'opencl-c.h', + ], + env: ['MESA_SHADER_CACHE_DISABLE=true'], + depends : [dep_prog_intel_clc, gen_cl_xml_pack], + ) +endforeach + +idep_intel_shaders = declare_dependency( + sources : intel_shaders, + include_directories : include_directories('.'), +) diff --git a/src/intel/shaders/query_copy.cl b/src/intel/shaders/query_copy.cl new file mode 100644 index 00000000000..a241fb1853f --- /dev/null +++ b/src/intel/shaders/query_copy.cl @@ -0,0 +1,72 @@ +/* Copyright © 2023 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +void +genX(libanv_query_copy)(global void *destination_base, + uint32_t destination_stride, + global void *query_data, + uint32_t first_query, + uint32_t num_queries, + uint32_t query_data_offset, + uint32_t query_stride, + uint32_t num_query_items, + uint32_t copy_flags, + uint32_t copy_item_idx) +{ + if (copy_item_idx >= num_queries) + return; + + bool is_result64 = (copy_flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0; + bool write_available = (copy_flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0; + bool compute_delta = (copy_flags & ANV_COPY_QUERY_FLAG_DELTA) != 0; + bool partial_result = (copy_flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0; + + + uint query_byte = (first_query + copy_item_idx) * query_stride; + uint query_data_byte = query_byte + query_data_offset; + + global uint64_t *query = query_data + (first_query + copy_item_idx) * query_stride; + global uint64_t *dest64 = destination_base + copy_item_idx * destination_stride; + global uint32_t *dest32 = destination_base + copy_item_idx * destination_stride; + + uint64_t availability = *(global uint32_t *)(query_data + query_byte); + + if (write_available) { + if (is_result64) + dest64[num_query_items] = availability; + else + dest32[num_query_items] = availability; + } + + for (uint32_t i = 0; i < num_query_items; i++) { + uint32_t qw_offset = 1 + i * 2; + uint64_t v; + if (compute_delta) { + struct delta64 { + uint64_t v0; + uint64_t v1; + } data = *((global struct delta64 *)&query[qw_offset]); + v = data.v1 - data.v0; + } else { + v = query[qw_offset + 0]; + } + + /* vkCmdCopyQueryPoolResults: + * + * "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is + * unavailable, an intermediate result between zero and the final + * result value is written for that query." + * + * We write 0 as the values not being written yet, we can't really make + * provide any sensible value. + */ + if (partial_result && availability == 0) + v = 0; + + if (is_result64) + dest64[i] = v; + else + dest32[i] = v; + } +} diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index a6a53533431..0b81795946e 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -39,6 +39,9 @@ struct intel_sample_positions; struct intel_urb_config; +typedef struct nir_builder nir_builder; +typedef struct nir_shader nir_shader; + extern const uint32_t genX(vk_to_intel_cullmode)[]; extern const uint32_t genX(vk_to_intel_front_face)[]; @@ -59,6 +62,11 @@ VkResult genX(init_device_state)(struct anv_device *device); void genX(init_cps_device_state)(struct anv_device *device); +nir_shader *genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx); + +uint32_t genX(call_internal_shader)(nir_builder *b, + enum anv_internal_kernel_name shader_name); + void genX(set_fast_clear_state)(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, diff --git a/src/intel/vulkan/anv_internal_kernels.c b/src/intel/vulkan/anv_internal_kernels.c index 02a856450d6..082f99b4eaf 100644 --- a/src/intel/vulkan/anv_internal_kernels.c +++ b/src/intel/vulkan/anv_internal_kernels.c @@ -25,7 +25,8 @@ #include "compiler/brw_compiler.h" #include "compiler/brw_nir.h" -#include "compiler/spirv/nir_spirv.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" #include "dev/intel_debug.h" #include "util/macros.h" @@ -33,78 +34,6 @@ #include "anv_internal_kernels.h" -#include "shaders/generated_draws_spv.h" -#include "shaders/query_copy_compute_spv.h" -#include "shaders/query_copy_fragment_spv.h" -#include "shaders/memcpy_compute_spv.h" - -static bool -lower_vulkan_descriptors_instr(nir_builder *b, nir_intrinsic_instr *intrin, - void *cb_data) -{ - if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) - return false; - - nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr; - assert(res_index_instr->type == nir_instr_type_intrinsic); - nir_intrinsic_instr *res_index_intrin = - nir_instr_as_intrinsic(res_index_instr); - assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); - - b->cursor = nir_after_instr(&intrin->instr); - - const struct anv_internal_kernel_bind_map *bind_map = cb_data; - uint32_t binding = nir_intrinsic_binding(res_index_intrin); - assert(binding < bind_map->num_bindings); - - nir_def *desc_value = NULL; - if (bind_map->bindings[binding].push_constant) { - desc_value = - nir_vec2(b, - nir_imm_int(b, binding), - nir_imm_int(b, 0)); - } else { - int push_constant_binding = -1; - for (uint32_t i = 0; i < bind_map->num_bindings; i++) { - if (bind_map->bindings[i].push_constant) { - push_constant_binding = i; - break; - } - } - assert(push_constant_binding != -1); - - desc_value = - nir_load_ubo(b, 1, 64, - nir_imm_int(b, push_constant_binding), - nir_imm_int(b, - bind_map->bindings[binding].address_offset), - .align_mul = 8, - .align_offset = 0, - .range_base = 0, - .range = ~0); - desc_value = - nir_vec4(b, - nir_unpack_64_2x32_split_x(b, desc_value), - nir_unpack_64_2x32_split_y(b, desc_value), - nir_imm_int(b, 0), - nir_imm_int(b, 0)); - } - - nir_def_rewrite_uses(&intrin->def, desc_value); - - return true; -} - -static bool -lower_vulkan_descriptors(nir_shader *shader, - const struct anv_internal_kernel_bind_map *bind_map) -{ - return nir_shader_intrinsics_pass(shader, lower_vulkan_descriptors_instr, - nir_metadata_block_index | - nir_metadata_dominance, - (void *)bind_map); -} - static bool lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin, UNUSED void *data) @@ -117,60 +46,44 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin, return true; } -static bool -lower_load_ubo_to_uniforms(nir_builder *b, nir_intrinsic_instr *intrin, - void *cb_data) +static void +link_libanv(nir_shader *nir, const nir_shader *libanv) { - if (intrin->intrinsic != nir_intrinsic_load_ubo) - return false; - - b->cursor = nir_instr_remove(&intrin->instr); - - nir_def_rewrite_uses( - &intrin->def, - nir_load_uniform(b, - intrin->def.num_components, - intrin->def.bit_size, - intrin->src[1].ssa, - .base = 0, - .range = intrin->def.num_components * - intrin->def.bit_size / 8)); - - return true; + nir_link_shader_functions(nir, libanv); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_remove_non_entrypoints); + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, + glsl_get_cl_type_size_align); + NIR_PASS_V(nir, nir_opt_deref); + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | + nir_var_mem_global, + nir_address_format_62bit_generic); } static struct anv_shader_bin * -compile_upload_spirv(struct anv_device *device, - gl_shader_stage stage, - const char *name, - const void *hash_key, - uint32_t hash_key_size, - const struct anv_internal_kernel_bind_map *bind_map, - const uint32_t *spirv_source, - uint32_t spirv_source_size, - uint32_t sends_count_expectation) +compile_shader(struct anv_device *device, + const nir_shader *libanv, + enum anv_internal_kernel_name shader_name, + gl_shader_stage stage, + const char *name, + const void *hash_key, + uint32_t hash_key_size, + uint32_t sends_count_expectation) { - struct spirv_to_nir_options spirv_options = { - .caps = { - .int64 = true, - }, - .ubo_addr_format = nir_address_format_32bit_index_offset, - .ssbo_addr_format = nir_address_format_64bit_global_32bit_offset, - .environment = NIR_SPIRV_VULKAN, - .create_library = false, - }; const nir_shader_compiler_options *nir_options = device->physical->compiler->nir_options[stage]; - nir_shader* nir = - vk_spirv_to_nir(&device->vk, spirv_source, spirv_source_size * 4, - stage, "main", 0, NULL, &spirv_options, - nir_options, true /* internal */, - NULL); + nir_builder b = nir_builder_init_simple_shader(stage, nir_options, + "%s", name); - assert(nir != NULL); + uint32_t uniform_size = + anv_genX(device->info, call_internal_shader)(&b, shader_name); - nir->info.name = ralloc_strdup(nir, name); + nir_shader *nir = b.shader; + + link_libanv(nir, libanv); NIR_PASS_V(nir, nir_lower_vars_to_ssa); NIR_PASS_V(nir, nir_opt_cse); @@ -182,6 +95,12 @@ compile_upload_spirv(struct anv_device *device, NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_split_per_member_structs); + if (stage == MESA_SHADER_COMPUTE) { + nir->info.workgroup_size[0] = 16; + nir->info.workgroup_size[1] = 1; + nir->info.workgroup_size[2] = 1; + } + struct brw_compiler *compiler = device->physical->compiler; struct brw_nir_compiler_opts opts = {}; brw_preprocess_nir(compiler, nir, &opts); @@ -205,25 +124,12 @@ compile_upload_spirv(struct anv_device *device, nir_metadata_block_index | nir_metadata_dominance, NULL); } + /* Reset sizes before gathering information */ + nir->global_mem_size = 0; + nir->scratch_size = 0; + nir->info.shared_size = 0; nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - /* Do vectorizing here. For some reason when trying to do it in the back - * this just isn't working. - */ - nir_load_store_vectorize_options options = { - .modes = nir_var_mem_ubo | nir_var_mem_ssbo, - .callback = brw_nir_should_vectorize_mem, - .robust_modes = (nir_variable_mode)0, - }; - NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options); - - NIR_PASS_V(nir, lower_vulkan_descriptors, bind_map); - - NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, - nir_address_format_32bit_index_offset); - NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, - nir_address_format_64bit_global_32bit_offset); - NIR_PASS_V(nir, nir_copy_prop); NIR_PASS_V(nir, nir_opt_constant_folding); NIR_PASS_V(nir, nir_opt_dce); @@ -235,14 +141,22 @@ compile_upload_spirv(struct anv_device *device, memset(&prog_data, 0, sizeof(prog_data)); if (stage == MESA_SHADER_COMPUTE) { - NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_load_ubo_to_uniforms, - nir_metadata_block_index | nir_metadata_dominance, - NULL); - NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, device->info, - &prog_data.cs); - nir->num_uniforms = bind_map->push_data_size; + NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, + device->info, &prog_data.cs); } + /* Do vectorizing here. For some reason when trying to do it in the back + * this just isn't working. + */ + nir_load_store_vectorize_options options = { + .modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global, + .callback = brw_nir_should_vectorize_mem, + .robust_modes = (nir_variable_mode)0, + }; + NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options); + + nir->num_uniforms = uniform_size; + prog_data.base.nr_params = nir->num_uniforms / 4; brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges); @@ -270,18 +184,21 @@ compile_upload_spirv(struct anv_device *device, assert(stats[stat_idx].spills == 0); assert(stats[stat_idx].fills == 0); assert(stats[stat_idx].sends == sends_count_expectation); + assert(stats[stat_idx].spills == 0); stat_idx++; } if (prog_data.wm.dispatch_16) { assert(stats[stat_idx].spills == 0); assert(stats[stat_idx].fills == 0); assert(stats[stat_idx].sends == sends_count_expectation); + assert(stats[stat_idx].spills == 0); stat_idx++; } if (prog_data.wm.dispatch_32) { assert(stats[stat_idx].spills == 0); assert(stats[stat_idx].fills == 0); assert(stats[stat_idx].sends == sends_count_expectation * 2); + assert(stats[stat_idx].spills == 0); stat_idx++; } } else { @@ -304,6 +221,8 @@ compile_upload_spirv(struct anv_device *device, assert(stats.sends == sends_count_expectation); } + assert(prog_data.base.total_scratch == 0); + struct anv_pipeline_bind_map empty_bind_map = {}; struct anv_push_descriptor_info empty_push_desc_info = {}; struct anv_shader_upload_params upload_params = { @@ -336,6 +255,11 @@ anv_device_init_internal_kernels(struct anv_device *device) false /* needs_slm */); device->internal_kernels_l3_config = intel_get_l3_config(device->info, w); + void *mem_ctx = ralloc_context(NULL); + + nir_shader *libanv_shaders = + anv_genX(device->info, load_libanv_shader)(device, mem_ctx); + const struct { struct { char name[40]; @@ -343,134 +267,54 @@ anv_device_init_internal_kernels(struct anv_device *device) gl_shader_stage stage; - const uint32_t *spirv_data; - uint32_t spirv_size; - uint32_t send_count; - - struct anv_internal_kernel_bind_map bind_map; } internal_kernels[] = { [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = { .key = { .name = "anv-generated-indirect-draws", }, .stage = MESA_SHADER_FRAGMENT, - .spirv_data = generated_draws_spv_source, - .spirv_size = ARRAY_SIZE(generated_draws_spv_source), - .send_count = /* 2 * (2 loads + 3 stores) + ** gfx11 ** - * 2 * (2 loads + 6 stores) + ** gfx9 ** - * 1 load + 3 store - */ 29, - .bind_map = { - .num_bindings = 5, - .bindings = { - { - .address_offset = offsetof(struct anv_generated_indirect_params, - indirect_data_addr), - }, - { - .address_offset = offsetof(struct anv_generated_indirect_params, - generated_cmds_addr), - }, - { - .address_offset = offsetof(struct anv_generated_indirect_params, - draw_ids_addr), - }, - { - .address_offset = offsetof(struct anv_generated_indirect_params, - draw_count_addr), - }, - { - .push_constant = true, - }, - }, - .push_data_size = sizeof(struct anv_generated_indirect_params), - }, + .send_count = (device->info->ver == 9 ? + /* 1 load + + * 4 stores + + * 2 * (2 loads + 2 stores) + + * 3 stores + */ + 16 : + /* 1 load + + * 2 * (2 loads + 3 stores) + + * 3 stores + */ + 14), }, [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = { .key = { .name = "anv-copy-query-compute", }, .stage = MESA_SHADER_COMPUTE, - .spirv_data = query_copy_compute_spv_source, - .spirv_size = ARRAY_SIZE(query_copy_compute_spv_source), .send_count = device->info->verx10 >= 125 ? 9 /* 4 loads + 4 stores + 1 EOT */ : 8 /* 3 loads + 4 stores + 1 EOT */, - .bind_map = { - .num_bindings = 3, - .bindings = { - { - .address_offset = offsetof(struct anv_query_copy_params, - query_data_addr), - }, - { - .address_offset = offsetof(struct anv_query_copy_params, - destination_addr), - }, - { - .push_constant = true, - }, - }, - .push_data_size = sizeof(struct anv_query_copy_params), - }, }, [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = { .key = { .name = "anv-copy-query-fragment", }, .stage = MESA_SHADER_FRAGMENT, - .spirv_data = query_copy_fragment_spv_source, - .spirv_size = ARRAY_SIZE(query_copy_fragment_spv_source), .send_count = 8 /* 3 loads + 4 stores + 1 EOT */, - .bind_map = { - .num_bindings = 3, - .bindings = { - { - .address_offset = offsetof(struct anv_query_copy_params, - query_data_addr), - }, - { - .address_offset = offsetof(struct anv_query_copy_params, - destination_addr), - }, - { - .push_constant = true, - }, - }, - .push_data_size = sizeof(struct anv_query_copy_params), - }, }, [ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = { .key = { .name = "anv-memcpy-compute", }, .stage = MESA_SHADER_COMPUTE, - .spirv_data = memcpy_compute_spv_source, - .spirv_size = ARRAY_SIZE(memcpy_compute_spv_source), .send_count = device->info->verx10 >= 125 ? 10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ : 9 /* 4 loads + 4 stores + 1 EOT */, - .bind_map = { - .num_bindings = 3, - .bindings = { - { - .address_offset = offsetof(struct anv_memcpy_params, - src_addr), - }, - { - .address_offset = offsetof(struct anv_memcpy_params, - dst_addr), - }, - { - .push_constant = true, - }, - }, - .push_data_size = sizeof(struct anv_memcpy_params), - }, }, }; + VkResult result = VK_SUCCESS; for (uint32_t i = 0; i < ARRAY_SIZE(internal_kernels); i++) { device->internal_kernels[i] = anv_device_search_for_kernel(device, @@ -480,18 +324,19 @@ anv_device_init_internal_kernels(struct anv_device *device) NULL); if (device->internal_kernels[i] == NULL) { device->internal_kernels[i] = - compile_upload_spirv(device, - internal_kernels[i].stage, - internal_kernels[i].key.name, - &internal_kernels[i].key, - sizeof(internal_kernels[i].key), - &internal_kernels[i].bind_map, - internal_kernels[i].spirv_data, - internal_kernels[i].spirv_size, - internal_kernels[i].send_count); + compile_shader(device, + libanv_shaders, + i, + internal_kernels[i].stage, + internal_kernels[i].key.name, + &internal_kernels[i].key, + sizeof(internal_kernels[i].key), + internal_kernels[i].send_count); + } + if (device->internal_kernels[i] == NULL) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto error; } - if (device->internal_kernels[i] == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); /* The cache already has a reference and it's not going anywhere so * there is no need to hold a second reference. @@ -499,7 +344,8 @@ anv_device_init_internal_kernels(struct anv_device *device) anv_shader_bin_unref(device, device->internal_kernels[i]); } - return VK_SUCCESS; + error: + return result; } void diff --git a/src/intel/vulkan/anv_internal_kernels.h b/src/intel/vulkan/anv_internal_kernels.h index cfb0d700670..d0e325add2a 100644 --- a/src/intel/vulkan/anv_internal_kernels.h +++ b/src/intel/vulkan/anv_internal_kernels.h @@ -24,21 +24,51 @@ #ifndef ANV_GENERATED_INDIRECT_DRAWS_H #define ANV_GENERATED_INDIRECT_DRAWS_H -#include "shaders/interface.h" +#include "libintel_shaders.h" -struct PACKED anv_generated_indirect_params { - struct anv_generated_indirect_draw_params draw; +struct PACKED anv_gen_indirect_params { + /* Draw ID buffer address (only used on Gfx9) */ + uint64_t draw_id_addr; - /* Global address of binding 0 */ + /* Indirect data buffer address (only used on Gfx9) */ uint64_t indirect_data_addr; - /* Global address of binding 1 */ + /* Stride between each elements of the indirect data buffer */ + uint32_t indirect_data_stride; + + uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws */ + + /* Base number of the draw ID, it is added to the index computed from the + * gl_FragCoord + */ + uint32_t draw_base; + + /* Maximum number of draws (equals to draw_count for indirect draws without + * an indirect count) + */ + uint32_t max_draw_count; + + /* Number of draws to generate in the ring buffer (only useful in ring + * buffer mode) + */ + uint32_t ring_count; + + /* Instance multiplier for multi view */ + uint32_t instance_multiplier; + + /* Address where to jump at to generate further draws (used with ring mode) + */ + uint64_t gen_addr; + + /* Address where to jump at after the generated draw (only used with + * indirect draw count variants) + */ + uint64_t end_addr; + + /* Destination of the generated draw commands */ uint64_t generated_cmds_addr; - /* Global address of binding 2 */ - uint64_t draw_ids_addr; - - /* Global address of binding 3 (points to the draw_count field above) */ + /* Draw count address (points to the draw_count field in cases) */ uint64_t draw_count_addr; /* Draw count value for non count variants of draw indirect commands */ @@ -48,31 +78,53 @@ struct PACKED anv_generated_indirect_params { * split into smaller chunks, see while loop in * genX(cmd_buffer_emit_indirect_generated_draws) */ - struct anv_generated_indirect_params *prev; + struct anv_gen_indirect_params *prev; }; struct PACKED anv_query_copy_params { - struct anv_query_copy_shader_params copy; + /* ANV_COPY_QUERY_FLAG_* flags */ + uint32_t flags; + /* Number of queries to copy */ + uint32_t num_queries; + + /* Number of items to write back in the results per query */ + uint32_t num_items; + + /* First query to copy result from */ + uint32_t query_base; + + /* Query stride in bytes */ + uint32_t query_stride; + + /* Offset at which the data should be read from */ + uint32_t query_data_offset; + + /* Stride of destination writes */ + uint32_t destination_stride; + + /* We need to be 64 bit aligned, or 32 bit builds get + * very unhappy. + */ + uint32_t padding; + + /* Address of the query pool */ uint64_t query_data_addr; + /* Destination address of the results */ uint64_t destination_addr; }; -/* This needs to match memcpy_compute.glsl : - * - * layout(set = 0, binding = 2) uniform block - */ -struct PACKED anv_memcpy_shader_params { - uint32_t num_dwords; - uint32_t pad; -}; - struct PACKED anv_memcpy_params { - struct anv_memcpy_shader_params copy; + /* Number of dwords to copy*/ + uint32_t num_dwords; + uint32_t pad; + + /* Source address of the copy */ uint64_t src_addr; + /* Destination address of the copy */ uint64_t dst_addr; }; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 95882c0de21..324dd8e7197 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1635,20 +1635,6 @@ enum anv_internal_kernel_name { ANV_INTERNAL_KERNEL_COUNT, }; -struct anv_internal_kernel_bind_map { - uint32_t num_bindings; - struct { - /* Whether this binding is provided through push constants */ - bool push_constant; - - /* When not provided by push constants, this is offset at which the - * 64bit address of the binding is located in the push constant data. - */ - uint32_t address_offset; - } bindings[5]; - uint32_t push_data_size; -}; - enum anv_rt_bvh_build_method { ANV_BVH_BUILD_METHOD_TRIVIAL, ANV_BVH_BUILD_METHOD_NEW_SAH, diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index 56d445958a8..1cb3e62c562 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -138,15 +138,13 @@ anv_device_utrace_emit_cs_copy_ts_buffer(struct u_trace_context *utctx, struct anv_memcpy_params *params = push_data_state.map; *params = (struct anv_memcpy_params) { - .copy = { - .num_dwords = count * sizeof(union anv_utrace_timestamp) / 4, - }, - .src_addr = anv_address_physical(from_addr), - .dst_addr = anv_address_physical(to_addr), + .num_dwords = count * sizeof(union anv_utrace_timestamp) / 4, + .src_addr = anv_address_physical(from_addr), + .dst_addr = anv_address_physical(to_addr), }; anv_genX(device->info, emit_simple_shader_dispatch)( - &submit->simple_state, DIV_ROUND_UP(params->copy.num_dwords, 4), + &submit->simple_state, DIV_ROUND_UP(params->num_dwords, 4), push_data_state); } diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 89fceb8fac5..2b48a72f1e4 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -60,7 +60,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, struct anv_state push_data_state = genX(simple_shader_alloc_push)(simple_state, - sizeof(struct anv_generated_indirect_params)); + sizeof(struct anv_gen_indirect_params)); if (push_data_state.map == NULL) return ANV_STATE_NULL; @@ -73,42 +73,37 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, if (anv_address_is_null(count_addr)) { draw_count_addr = anv_address_add( genX(simple_shader_push_state_address)(simple_state, push_data_state), - offsetof(struct anv_generated_indirect_params, draw_count)); + offsetof(struct anv_gen_indirect_params, draw_count)); } else { draw_count_addr = count_addr; } - struct anv_generated_indirect_params *push_data = push_data_state.map; - *push_data = (struct anv_generated_indirect_params) { - .draw = { - .draw_id_addr = anv_address_physical(draw_id_addr), - .indirect_data_addr = anv_address_physical(indirect_data_addr), - .indirect_data_stride = indirect_data_stride, - .flags = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) | - (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) | - (cmd_buffer->state.conditional_render_enabled ? - ANV_GENERATED_FLAG_PREDICATED : 0) | - ((vs_prog_data->uses_firstvertex || - vs_prog_data->uses_baseinstance) ? - ANV_GENERATED_FLAG_BASE : 0) | - (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) | - (anv_mocs(device, indirect_data_addr.bo, - ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) | - (!anv_address_is_null(count_addr) ? - ANV_GENERATED_FLAG_COUNT : 0) | - (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) | - ((generated_cmd_stride / 4) << 16) | - device->info->ver << 24, - .draw_base = item_base, - .max_draw_count = max_count, - .ring_count = ring_count, - .instance_multiplier = pipeline->instance_multiplier, - }, - .draw_count = anv_address_is_null(count_addr) ? max_count : 0, - .indirect_data_addr = anv_address_physical(indirect_data_addr), - .generated_cmds_addr = anv_address_physical(generated_cmds_addr), - .draw_ids_addr = anv_address_physical(draw_id_addr), - .draw_count_addr = anv_address_physical(draw_count_addr), + struct anv_gen_indirect_params *push_data = push_data_state.map; + *push_data = (struct anv_gen_indirect_params) { + .draw_id_addr = anv_address_physical(draw_id_addr), + .indirect_data_addr = anv_address_physical(indirect_data_addr), + .indirect_data_stride = indirect_data_stride, + .flags = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) | + (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) | + (cmd_buffer->state.conditional_render_enabled ? + ANV_GENERATED_FLAG_PREDICATED : 0) | + ((vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) ? + ANV_GENERATED_FLAG_BASE : 0) | + (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) | + (anv_mocs(device, indirect_data_addr.bo, + ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) | + (!anv_address_is_null(count_addr) ? + ANV_GENERATED_FLAG_COUNT : 0) | + (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) | + ((generated_cmd_stride / 4) << 16), + .draw_base = item_base, + .max_draw_count = max_count, + .ring_count = ring_count, + .instance_multiplier = pipeline->instance_multiplier, + .draw_count = anv_address_is_null(count_addr) ? max_count : 0, + .generated_cmds_addr = anv_address_physical(generated_cmds_addr), + .draw_count_addr = anv_address_physical(draw_count_addr), }; genX(emit_simple_shader_dispatch)(simple_state, item_count, push_data_state); @@ -212,7 +207,7 @@ genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer) static void genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer, - struct anv_generated_indirect_params *params) + struct anv_gen_indirect_params *params) { /* We don't know the end_addr until we have emitted all the generation * draws. Go and edit the address of all the push parameters. @@ -220,7 +215,7 @@ genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer, uint64_t end_addr = anv_address_physical(anv_batch_current_address(&cmd_buffer->batch)); while (params != NULL) { - params->draw.end_addr = end_addr; + params->end_addr = end_addr; params = params->prev; } } @@ -295,7 +290,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cm const uint32_t draw_cmd_stride = genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer); - struct anv_generated_indirect_params *last_params = NULL; + struct anv_gen_indirect_params *last_params = NULL; uint32_t item_base = 0; while (item_base < max_draw_count) { const uint32_t item_count = MIN2(max_draw_count - item_base, @@ -329,7 +324,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cm max_draw_count, indexed, 0 /* ring_count */); - struct anv_generated_indirect_params *params = params_state.map; + struct anv_gen_indirect_params *params = params_state.map; if (params == NULL) return; @@ -498,7 +493,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd max_draw_count, indexed, ring_count); - struct anv_generated_indirect_params *params = params_state.map; + struct anv_gen_indirect_params *params = params_state.map; anv_add_pending_pipe_bits(cmd_buffer, #if GFX_VER == 9 @@ -561,7 +556,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd struct anv_address draw_base_addr = anv_address_add( genX(simple_shader_push_state_address)( &simple_state, params_state), - offsetof(struct anv_generated_indirect_params, draw.draw_base)); + offsetof(struct anv_gen_indirect_params, draw_base)); const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &draw_base_addr); @@ -594,8 +589,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, "after generated draws end"); - params->draw.gen_addr = anv_address_physical(inc_addr); - params->draw.end_addr = anv_address_physical(end_addr); + params->gen_addr = anv_address_physical(inc_addr); + params->end_addr = anv_address_physical(end_addr); } } diff --git a/src/intel/vulkan/genX_internal_kernels.c b/src/intel/vulkan/genX_internal_kernels.c new file mode 100644 index 00000000000..85b69a04978 --- /dev/null +++ b/src/intel/vulkan/genX_internal_kernels.c @@ -0,0 +1,111 @@ +/* Copyright © 2023 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "anv_private.h" +#include "anv_internal_kernels.h" + +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_serialize.h" + +#if GFX_VERx10 == 90 +# include "intel_gfx9_shaders_code.h" +#elif GFX_VERx10 == 110 +# include "intel_gfx11_shaders_code.h" +#elif GFX_VERx10 == 120 +# include "intel_gfx12_shaders_code.h" +#elif GFX_VERx10 == 125 +# include "intel_gfx125_shaders_code.h" +#elif GFX_VERx10 == 200 +# include "intel_gfx20_shaders_code.h" +#else +# error "Unsupported generation" +#endif + +#include "genxml/gen_macros.h" + +#define load_param(b, bit_size, struct_name, field_name) \ + nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \ + .base = offsetof(struct_name, field_name), \ + .range = bit_size / 8) + +static nir_def * +load_fragment_index(nir_builder *b) +{ + nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2)); + return nir_iadd(b, + nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192), + nir_channel(b, pos_in, 0)); +} + +static nir_def * +load_compute_index(nir_builder *b) +{ + return nir_channel(b, nir_load_global_invocation_id_zero_base(b, 32), 0); +} + +nir_shader * +genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx) +{ + const nir_shader_compiler_options *nir_options = + device->physical->compiler->nir_options[MESA_SHADER_KERNEL]; + + struct blob_reader blob; + blob_reader_init(&blob, (void *)genX(intel_shaders_nir), + sizeof(genX(intel_shaders_nir))); + return nir_deserialize(mem_ctx, nir_options, &blob); +} + +uint32_t +genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_name) +{ + switch (shader_name) { + case ANV_INTERNAL_KERNEL_GENERATED_DRAWS: + genX(libanv_write_draw)( + b, + load_param(b, 64, struct anv_gen_indirect_params, generated_cmds_addr), + load_param(b, 64, struct anv_gen_indirect_params, indirect_data_addr), + load_param(b, 64, struct anv_gen_indirect_params, draw_id_addr), + load_param(b, 32, struct anv_gen_indirect_params, indirect_data_stride), + load_param(b, 64, struct anv_gen_indirect_params, draw_count_addr), + load_param(b, 32, struct anv_gen_indirect_params, draw_base), + load_param(b, 32, struct anv_gen_indirect_params, instance_multiplier), + load_param(b, 32, struct anv_gen_indirect_params, max_draw_count), + load_param(b, 32, struct anv_gen_indirect_params, flags), + load_param(b, 32, struct anv_gen_indirect_params, ring_count), + load_param(b, 64, struct anv_gen_indirect_params, gen_addr), + load_param(b, 64, struct anv_gen_indirect_params, end_addr), + load_fragment_index(b)); + return sizeof(struct anv_gen_indirect_params); + + case ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE: + case ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT: + genX(libanv_query_copy)( + b, + load_param(b, 64, struct anv_query_copy_params, destination_addr), + load_param(b, 32, struct anv_query_copy_params, destination_stride), + load_param(b, 64, struct anv_query_copy_params, query_data_addr), + load_param(b, 32, struct anv_query_copy_params, query_base), + load_param(b, 32, struct anv_query_copy_params, num_queries), + load_param(b, 32, struct anv_query_copy_params, query_data_offset), + load_param(b, 32, struct anv_query_copy_params, query_stride), + load_param(b, 32, struct anv_query_copy_params, num_items), + load_param(b, 32, struct anv_query_copy_params, flags), + shader_name == ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE ? + load_compute_index(b) : load_fragment_index(b)); + return sizeof(struct anv_query_copy_params); + + case ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE: + genX(libanv_memcpy)( + b, + load_param(b, 64, struct anv_memcpy_params, dst_addr), + load_param(b, 64, struct anv_memcpy_params, src_addr), + load_param(b, 32, struct anv_memcpy_params, num_dwords), + nir_imul_imm(b, load_compute_index(b), 4)); + return sizeof(struct anv_memcpy_params); + + default: + unreachable("Invalid shader name"); + break; + } +} diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 92ed95fe697..1e11ed90f71 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -1799,20 +1799,18 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, } *params = (struct anv_query_copy_params) { - .copy = { - .flags = copy_flags, - .num_queries = query_count, - .num_items = num_items, - .query_base = first_query, - .query_stride = pool->stride, - .query_data_offset = data_offset, - .destination_stride = dest_stride, - }, - .query_data_addr = anv_address_physical( + .flags = copy_flags, + .num_queries = query_count, + .num_items = num_items, + .query_base = first_query, + .query_stride = pool->stride, + .query_data_offset = data_offset, + .destination_stride = dest_stride, + .query_data_addr = anv_address_physical( (struct anv_address) { .bo = pool->bo, }), - .destination_addr = anv_address_physical(dest_addr), + .destination_addr = anv_address_physical(dest_addr), }; genX(emit_simple_shader_dispatch)(&state, query_count, push_data_state); diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index ec9d9360f1e..79f13dd28b3 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -18,8 +18,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -subdir('shaders') - inc_anv = include_directories('.') anv_flags = [ @@ -110,6 +108,7 @@ anv_per_hw_ver_files = files( 'genX_gfx_state.c', 'genX_gpu_memcpy.c', 'genX_init_state.c', + 'genX_internal_kernels.c', 'genX_pipeline.c', 'genX_query.c', 'genX_simple_shader.c', @@ -121,7 +120,7 @@ endif foreach _gfx_ver : ['90', '110', '120', '125', '200'] libanv_per_hw_ver_libs += static_library( 'anv_per_hw_ver@0@'.format(_gfx_ver), - [anv_per_hw_ver_files, anv_entrypoints[0], anv_internal_spvs, ], + [anv_per_hw_ver_files, anv_entrypoints[0]], include_directories : [ inc_include, inc_src, inc_intel, ], @@ -131,7 +130,7 @@ foreach _gfx_ver : ['90', '110', '120', '125', '200'] dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml, idep_vulkan_util_headers, idep_vulkan_wsi_headers, idep_vulkan_runtime_headers, idep_intel_driver_ds_headers, - idep_grl, + idep_grl, idep_intel_shaders, ], ) endforeach @@ -207,6 +206,7 @@ anv_deps = [ idep_vulkan_util_headers, idep_vulkan_runtime_headers, idep_vulkan_wsi_headers, + idep_intel_shaders, ] if with_platform_x11 @@ -234,8 +234,7 @@ libanv_common = static_library( 'anv_common', [ libanv_files, anv_entrypoints, sha1_h, - gen_xml_pack, float64_spv_h, - anv_internal_spvs, + gen_xml_pack, intel_float64_spv_h, ], include_directories : [ inc_include, inc_src, inc_intel, @@ -298,7 +297,7 @@ if with_tests dependencies : [ dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime, - idep_mesautil, idep_intel_dev, + idep_mesautil, idep_intel_dev, idep_intel_shaders, ], c_args : anv_flags, gnu_symbol_visibility : 'hidden', @@ -328,6 +327,7 @@ if with_tests idep_gtest, dep_libdrm, dep_thread, dep_m, dep_valgrind, idep_vulkan_util, idep_vulkan_wsi_headers, idep_vulkan_runtime, idep_intel_driver_ds, idep_intel_dev, + idep_intel_shaders, ], include_directories : [ inc_include, inc_src, inc_intel, diff --git a/src/intel/vulkan/shaders/common_generated_draws.glsl b/src/intel/vulkan/shaders/common_generated_draws.glsl deleted file mode 100644 index 9316a065c34..00000000000 --- a/src/intel/vulkan/shaders/common_generated_draws.glsl +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright © 2022 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "interface.h" - -/* All storage bindings will be accessed through A64 messages */ -layout(set = 0, binding = 0, std430) buffer Storage0 { - uint indirect_data[]; -}; - -layout(set = 0, binding = 1, std430) buffer Storage1 { - uint commands[]; -}; - -layout(set = 0, binding = 2, std430) buffer Storage2 { - uint draw_ids[]; -}; - -/* We're not using a uniform block for this because our compiler - * infrastructure relies on UBOs to be 32-bytes aligned so that we can push - * them into registers. This value can come directly from the indirect buffer - * given to indirect draw commands and the requirement there is 4-bytes - * alignment. - * - * Also use a prefix to the variable to remember to make a copy of it, avoid - * unnecessary accesses. - */ -layout(set = 0, binding = 3) buffer Storage3 { - uint _draw_count; -}; - -/* This data will be provided through push constants. */ -layout(set = 0, binding = 4) uniform block { - anv_generated_indirect_draw_params params; -}; - -void write_VERTEX_BUFFER_STATE(uint write_offset, - uint mocs, - uint buffer_idx, - uint64_t address, - uint size) -{ - commands[write_offset + 0] = (0 << 0 | /* Buffer Pitch */ - 0 << 13 | /* Null Vertex Buffer */ - 1 << 14 | /* Address Modify Enable */ - mocs << 16 | /* MOCS */ - buffer_idx << 26); /* Vertex Buffer Index */ - commands[write_offset + 1] = uint(address & 0xffffffff); - commands[write_offset + 2] = uint(address >> 32); - commands[write_offset + 3] = size; -} - -void write_3DPRIMITIVE(uint write_offset, - bool is_predicated, - bool is_indexed, - uint vertex_count_per_instance, - uint start_vertex_location, - uint instance_count, - uint start_instance_location, - uint base_vertex_location) -{ - commands[write_offset + 0] = (3 << 29 | /* Command Type */ - 3 << 27 | /* Command SubType */ - 3 << 24 | /* 3D Command Opcode */ - uint(is_predicated) << 8 | - 5 << 0); /* DWord Length */ - commands[write_offset + 1] = uint(is_indexed) << 8; - commands[write_offset + 2] = vertex_count_per_instance; - commands[write_offset + 3] = start_vertex_location; - commands[write_offset + 4] = instance_count; - commands[write_offset + 5] = start_instance_location; - commands[write_offset + 6] = base_vertex_location; -} - -void write_3DPRIMITIVE_EXTENDED(uint write_offset, - bool is_predicated, - bool is_indexed, - bool use_tbimr, - uint vertex_count_per_instance, - uint start_vertex_location, - uint instance_count, - uint start_instance_location, - uint base_vertex_location, - uint param_base_vertex, - uint param_base_instance, - uint param_draw_id) -{ - commands[write_offset + 0] = (3 << 29 | /* Command Type */ - 3 << 27 | /* Command SubType */ - 3 << 24 | /* 3D Command Opcode */ - uint(use_tbimr) << 13 | - 1 << 11 | /* Extended Parameter Enable */ - uint(is_predicated) << 8 | - 8 << 0); /* DWord Length */ - commands[write_offset + 1] = uint(is_indexed) << 8; - commands[write_offset + 2] = vertex_count_per_instance; - commands[write_offset + 3] = start_vertex_location; - commands[write_offset + 4] = instance_count; - commands[write_offset + 5] = start_instance_location; - commands[write_offset + 6] = base_vertex_location; - commands[write_offset + 7] = param_base_vertex; - commands[write_offset + 8] = param_base_instance; - commands[write_offset + 9] = param_draw_id; -} - -void write_MI_BATCH_BUFFER_START(uint write_offset, - uint64_t addr) -{ - commands[write_offset + 0] = (0 << 29 | /* Command Type */ - 49 << 23 | /* MI Command Opcode */ - 1 << 8 | /* Address Space Indicator (PPGTT) */ - 1 << 0); /* DWord Length */ - commands[write_offset + 1] = uint(addr & 0xffffffff); - commands[write_offset + 2] = uint(addr >> 32); -} - -void end_generated_draws(uint item_idx, uint cmd_idx, uint draw_id, uint draw_count) -{ - uint _3dprim_dw_size = (params.flags >> 16) & 0xff; - bool indirect_count = (params.flags & ANV_GENERATED_FLAG_COUNT) != 0; - bool ring_mode = (params.flags & ANV_GENERATED_FLAG_RING_MODE) != 0; - /* We can have an indirect draw count = 0. */ - uint last_draw_id = draw_count == 0 ? 0 : (min(draw_count, params.max_draw_count) - 1); - uint jump_offset = draw_count == 0 ? 0 : _3dprim_dw_size; - - if (ring_mode) { - if (draw_id == last_draw_id) { - /* Exit the ring buffer to the next user commands */ - write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.end_addr); - } else if (item_idx == (params.ring_count - 1)) { - /* Jump back to the generation shader to generate mode draws */ - write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.gen_addr); - } - } else { - if (draw_id == last_draw_id && draw_count < params.max_draw_count) { - /* Skip forward to the end of the generated draws */ - write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.end_addr); - } - } -} diff --git a/src/intel/vulkan/shaders/common_query_copy.glsl b/src/intel/vulkan/shaders/common_query_copy.glsl deleted file mode 100644 index 9e480ba0be8..00000000000 --- a/src/intel/vulkan/shaders/common_query_copy.glsl +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright © 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "interface.h" - -/* These 3 bindings will be accessed through A64 messages */ -layout(set = 0, binding = 0, std430) buffer Storage0 { - uint query_data[]; -}; - -layout(set = 0, binding = 1, std430) buffer Storage1 { - uint destination[]; -}; - -/* This data will be provided through push constants. */ -layout(set = 0, binding = 2) uniform block { - anv_query_copy_shader_params params; -}; - -void query_copy(uint item_idx) -{ - if (item_idx >= params.num_queries) - return; - - bool is_result64 = (params.flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0; - bool write_available = (params.flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0; - bool compute_delta = (params.flags & ANV_COPY_QUERY_FLAG_DELTA) != 0; - bool partial_result = (params.flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0; - - uint query_byte = (params.query_base + item_idx) * params.query_stride; - uint query_data_byte = query_byte + params.query_data_offset; - uint destination_byte = item_idx * params.destination_stride; - - uint64_t availability = query_data[query_byte / 4]; - - uint query_data_dword = query_data_byte / 4; - uint dest_dword = destination_byte / 4; - for (uint i = 0; i < params.num_items; i++) { - uint item_data_dword = query_data_dword + i * 2 * (compute_delta ? 2 : 1); - - uint64_t v; - if (compute_delta) { - uint64_t v0 = uint64_t(query_data[item_data_dword + 0]) | - (uint64_t(query_data[item_data_dword + 1]) << 32); - uint64_t v1 = uint64_t(query_data[item_data_dword + 2]) | - (uint64_t(query_data[item_data_dword + 3]) << 32); - - v = v1 - v0; - } else { - - v = uint64_t(query_data[item_data_dword + 0]) | - (uint64_t(query_data[item_data_dword + 1]) << 32); - } - - /* vkCmdCopyQueryPoolResults: - * - * "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is - * unavailable, an intermediate result between zero and the final - * result value is written for that query." - * - * We write 0 as the values not being written yet, we can't really make - * provide any sensible value. - */ - if (partial_result && availability == 0) - v = 0; - - if (is_result64) { - destination[dest_dword + 0] = uint(v & 0xffffffff); - destination[dest_dword + 1] = uint(v >> 32); - dest_dword += 2; - } else { - destination[dest_dword + 0] = uint(v & 0xffffffff); - dest_dword += 1; - } - } - - if (write_available) { - if (is_result64) { - destination[dest_dword + 0] = uint(availability & 0xffffffff); - destination[dest_dword + 1] = uint(availability >> 32); - } else { - destination[dest_dword + 0] = uint(availability & 0xffffffff); - } - } -} diff --git a/src/intel/vulkan/shaders/generated_draws.glsl b/src/intel/vulkan/shaders/generated_draws.glsl deleted file mode 100644 index cb4593df84b..00000000000 --- a/src/intel/vulkan/shaders/generated_draws.glsl +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright © 2022 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#version 450 -#extension GL_ARB_gpu_shader_int64 : enable -#extension GL_GOOGLE_include_directive : enable - -#include "common_generated_draws.glsl" - -void gfx11_write_draw(uint item_idx, uint cmd_idx, uint draw_id) -{ - bool is_indexed = (params.flags & ANV_GENERATED_FLAG_INDEXED) != 0; - bool is_predicated = (params.flags & ANV_GENERATED_FLAG_PREDICATED) != 0; - bool use_tbimr = (params.flags & ANV_GENERATED_FLAG_TBIMR) != 0; - uint indirect_data_offset = draw_id * params.indirect_data_stride / 4; - - if (is_indexed) { - /* Loading a VkDrawIndexedIndirectCommand */ - uint index_count = indirect_data[indirect_data_offset + 0]; - uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier; - uint first_index = indirect_data[indirect_data_offset + 2]; - uint vertex_offset = indirect_data[indirect_data_offset + 3]; - uint first_instance = indirect_data[indirect_data_offset + 4]; - - write_3DPRIMITIVE_EXTENDED(cmd_idx, - is_predicated, - is_indexed, - use_tbimr, - index_count, - first_index, - instance_count, - first_instance, - vertex_offset, - vertex_offset, - first_instance, - draw_id); - } else { - /* Loading a VkDrawIndirectCommand structure */ - uint vertex_count = indirect_data[indirect_data_offset + 0]; - uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier; - uint first_vertex = indirect_data[indirect_data_offset + 2]; - uint first_instance = indirect_data[indirect_data_offset + 3]; - - write_3DPRIMITIVE_EXTENDED(cmd_idx, - is_predicated, - is_indexed, - use_tbimr, - vertex_count, - first_vertex, - instance_count, - first_instance, - 0 /* base_vertex_location */, - first_vertex, - first_instance, - draw_id); - } -} - -void gfx9_write_draw(uint item_idx, uint cmd_idx, uint draw_id) -{ - bool is_indexed = (params.flags & ANV_GENERATED_FLAG_INDEXED) != 0; - bool is_predicated = (params.flags & ANV_GENERATED_FLAG_PREDICATED) != 0; - bool uses_base = (params.flags & ANV_GENERATED_FLAG_BASE) != 0; - bool uses_drawid = (params.flags & ANV_GENERATED_FLAG_DRAWID) != 0; - uint mocs = (params.flags >> 8) & 0xff; - uint indirect_data_offset = draw_id * params.indirect_data_stride / 4; - - if (is_indexed) { - /* Loading a VkDrawIndexedIndirectCommand */ - uint index_count = indirect_data[indirect_data_offset + 0]; - uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier; - uint first_index = indirect_data[indirect_data_offset + 2]; - uint vertex_offset = indirect_data[indirect_data_offset + 3]; - uint first_instance = indirect_data[indirect_data_offset + 4]; - - if (uses_base || uses_drawid) { - uint state_vertex_len = - 1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0); - commands[cmd_idx] = - (3 << 29 | /* Command Type */ - 3 << 27 | /* Command SubType */ - 0 << 24 | /* 3D Command Opcode */ - 8 << 16 | /* 3D Command Sub Opcode */ - (state_vertex_len - 2) << 0); /* DWord Length */ - cmd_idx += 1; - if (uses_base) { - uint64_t indirect_draw_data_addr = - params.indirect_data_addr + item_idx * params.indirect_data_stride + 12; - write_VERTEX_BUFFER_STATE(cmd_idx, - mocs, - 31, - indirect_draw_data_addr, - 8); - cmd_idx += 4; - } - if (uses_drawid) { - uint64_t draw_idx_addr = params.draw_id_addr + 4 * item_idx; - draw_ids[item_idx] = draw_id; - write_VERTEX_BUFFER_STATE(cmd_idx, - mocs, - 32, - draw_idx_addr, - 4); - cmd_idx += 4; - } - } - write_3DPRIMITIVE(cmd_idx, - is_predicated, - is_indexed, - index_count, - first_index, - instance_count, - first_instance, - vertex_offset); - } else { - /* Loading a VkDrawIndirectCommand structure */ - uint vertex_count = indirect_data[indirect_data_offset + 0]; - uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier; - uint first_vertex = indirect_data[indirect_data_offset + 2]; - uint first_instance = indirect_data[indirect_data_offset + 3]; - - if (uses_base || uses_drawid) { - uint state_vertex_len = - 1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0); - commands[cmd_idx] = - (3 << 29 | /* Command Type */ - 3 << 27 | /* Command SubType */ - 0 << 24 | /* 3D Command Opcode */ - 8 << 16 | /* 3D Command Sub Opcode */ - (state_vertex_len - 2) << 0); /* DWord Length */ - cmd_idx += 1; - if (uses_base) { - uint64_t indirect_draw_data_addr = - params.indirect_data_addr + item_idx * params.indirect_data_stride + 8; - write_VERTEX_BUFFER_STATE(cmd_idx, - mocs, - 31, - indirect_draw_data_addr, - 8); - cmd_idx += 4; - } - if (uses_drawid) { - uint64_t draw_idx_addr = params.draw_id_addr + 4 * item_idx; - draw_ids[item_idx] = draw_id; - write_VERTEX_BUFFER_STATE(cmd_idx, - mocs, - 32, - draw_idx_addr, - 4); - cmd_idx += 4; - } - } - write_3DPRIMITIVE(cmd_idx, - is_predicated, - is_indexed, - vertex_count, - first_vertex, - instance_count, - first_instance, - 0 /* base_vertex_location */); - } -} - -void main() -{ - uint _3dprim_dw_size = (params.flags >> 16) & 0xff; - uint gfx_ver = (params.flags >> 24) & 0xff; - uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x); - uint cmd_idx = item_idx * _3dprim_dw_size; - uint draw_id = params.draw_base + item_idx; - uint draw_count = _draw_count; - - if (draw_id < min(draw_count, params.max_draw_count)) { - if (gfx_ver == 9) - gfx9_write_draw(item_idx, cmd_idx, draw_id); - else - gfx11_write_draw(item_idx, cmd_idx, draw_id); - } - - end_generated_draws(item_idx, cmd_idx, draw_id, draw_count); -} diff --git a/src/intel/vulkan/shaders/interface.h b/src/intel/vulkan/shaders/interface.h deleted file mode 100644 index 0d6a82ca31b..00000000000 --- a/src/intel/vulkan/shaders/interface.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright © 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef _SHADERS_INTERFACE_H_ -#define _SHADERS_INTERFACE_H_ - -#ifdef _IN_SHADER_ -#define BITFIELD_BIT(i) (1u << i) - -#define uint32_t uint -#define PACKED -#else -#include "util/macros.h" - -#include -#endif - -#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0) -#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1) -/* Only used on Gfx9, means the pipeline is using gl_DrawID */ -#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2) -/* Only used on Gfx9, means the pipeline is using gl_BaseVertex or - * gl_BaseInstance - */ -#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3) -/* Whether the count is indirect */ -#define ANV_GENERATED_FLAG_COUNT BITFIELD_BIT(4) -/* Whether the generation shader writes to the ring buffer */ -#define ANV_GENERATED_FLAG_RING_MODE BITFIELD_BIT(5) -/* Whether TBIMR tile-based rendering shall be enabled. */ -#define ANV_GENERATED_FLAG_TBIMR BITFIELD_BIT(6) - -struct PACKED anv_generated_indirect_draw_params { - /* Draw ID buffer address (only used on Gfx9) */ - uint64_t draw_id_addr; - /* Indirect data buffer address (only used on Gfx9) */ - uint64_t indirect_data_addr; - /* Stride between each elements of the indirect data buffer */ - uint32_t indirect_data_stride; - uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws, 24-31: gfx_ver */ - /* Base number of the draw ID, it is added to the index computed from the - * gl_FragCoord - */ - uint32_t draw_base; - /* Maximum number of draws (equals to draw_count for indirect draws without - * an indirect count) - */ - uint32_t max_draw_count; - /* Number of draws to generate in the ring buffer (only useful in ring - * buffer mode) - */ - uint32_t ring_count; - /* Instance multiplier for multi view */ - uint32_t instance_multiplier; - /* Address where to jump at to generate further draws (used with ring mode) - */ - uint64_t gen_addr; - /* Address where to jump at after the generated draw (only used with - * indirect draw count variants) - */ - uint64_t end_addr; -}; - -#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0) -#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1) -#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2) -#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3) - -/* This needs to match common_query_copy.glsl : - * - * layout(set = 0, binding = 2) uniform block - */ -struct PACKED anv_query_copy_shader_params { - /* ANV_COPY_QUERY_FLAG_* flags */ - uint32_t flags; - - /* Number of queries to copy */ - uint32_t num_queries; - - /* Number of items to write back in the results per query */ - uint32_t num_items; - - /* First query to copy result from */ - uint32_t query_base; - - /* Query stride in bytes */ - uint32_t query_stride; - - /* Offset at which the data should be read from */ - uint32_t query_data_offset; - - /* Stride of destination writes */ - uint32_t destination_stride; - - /* We need to be 64 bit aligned, or 32 bit builds get - * very unhappy. - */ - uint32_t padding; -}; - -#endif /* _SHADERS_INTERFACE_H_ */ diff --git a/src/intel/vulkan/shaders/memcpy_compute.glsl b/src/intel/vulkan/shaders/memcpy_compute.glsl deleted file mode 100644 index 61c399f8e9c..00000000000 --- a/src/intel/vulkan/shaders/memcpy_compute.glsl +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright © 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#version 450 -#extension GL_ARB_gpu_shader_int64 : enable -#extension GL_GOOGLE_include_directive : enable - -layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in; - -/* These 2 bindings will be accessed through A64 messages */ -layout(set = 0, binding = 0, std430) buffer Storage0 { - uint src[]; -}; - -layout(set = 0, binding = 1, std430) buffer Storage1 { - uint dst[]; -}; - -/* This data will be provided through push constants. */ -layout(set = 0, binding = 2) uniform block { - uint num_dwords; - uint pad; -}; - -void main() -{ - uint idx = gl_GlobalInvocationID.x * 4; - /* Try to do copies in single message as much as possible. */ - if (idx + 4 <= num_dwords) { - dst[idx + 0] = src[idx + 0]; - dst[idx + 1] = src[idx + 1]; - dst[idx + 2] = src[idx + 2]; - dst[idx + 3] = src[idx + 3]; - } else if (idx + 3 <= num_dwords) { - dst[idx + 0] = src[idx + 0]; - dst[idx + 1] = src[idx + 1]; - dst[idx + 2] = src[idx + 2]; - } else if (idx + 2 <= num_dwords) { - dst[idx + 0] = src[idx + 0]; - dst[idx + 1] = src[idx + 1]; - } else if (idx + 1 <= num_dwords) { - dst[idx + 0] = src[idx + 0]; - } -} diff --git a/src/intel/vulkan/shaders/meson.build b/src/intel/vulkan/shaders/meson.build deleted file mode 100644 index 12f99c510ac..00000000000 --- a/src/intel/vulkan/shaders/meson.build +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright © 2022 Intel Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -float64_spv_h = custom_target( - 'float64_spv.h', - input : [glsl2spirv, float64_glsl_file], - output : 'float64_spv.h', - command : [ - prog_python, '@INPUT@', '@OUTPUT@', - prog_glslang, - '--create-entry', 'main', - '--vn', 'float64_spv_source', - '--glsl-version', '450', - '-Olib', - ] -) - -anv_internal_shaders = [ - [ 'generated_draws.glsl', 'frag' ], - [ 'query_copy_compute.glsl', 'comp' ], - [ 'query_copy_fragment.glsl', 'frag' ], - [ 'memcpy_compute.glsl', 'comp' ], -] - -anv_internal_spvs = [] -foreach item : anv_internal_shaders - f = item[0] - stage = item[1] - spv_filename = f.replace('.glsl', '_spv.h') - src_name = f.replace('.glsl', '_spv_source') - anv_internal_spvs += custom_target( - spv_filename, - input : [glsl2spirv, f, files('common_generated_draws.glsl', - 'common_query_copy.glsl', - 'interface.h')], - output : spv_filename, - command : [ - prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@', - prog_glslang, - '--vn', src_name, - '--glsl-version', '450', - '--stage', stage, - '-I' + meson.current_source_dir(), - '-D_IN_SHADER_=1', - ]) -endforeach diff --git a/src/intel/vulkan/shaders/query_copy_compute.glsl b/src/intel/vulkan/shaders/query_copy_compute.glsl deleted file mode 100644 index 0e71af66529..00000000000 --- a/src/intel/vulkan/shaders/query_copy_compute.glsl +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright © 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#version 450 -#extension GL_ARB_gpu_shader_int64 : enable -#extension GL_GOOGLE_include_directive : enable - -#include "common_query_copy.glsl" - -layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in; - -void main() -{ - query_copy(gl_GlobalInvocationID.x); -} diff --git a/src/intel/vulkan/shaders/query_copy_fragment.glsl b/src/intel/vulkan/shaders/query_copy_fragment.glsl deleted file mode 100644 index c75025bd544..00000000000 --- a/src/intel/vulkan/shaders/query_copy_fragment.glsl +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright © 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#version 450 -#extension GL_ARB_gpu_shader_int64 : enable -#extension GL_GOOGLE_include_directive : enable - -#include "common_query_copy.glsl" - -void main() -{ - query_copy(uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x)); -}