anv: rewrite internal shaders using OpenCL

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26797>
This commit is contained in:
Lionel Landwerlin
2023-11-06 13:12:25 +02:00
committed by Marge Bot
parent da391650f5
commit b52e25d3a8
26 changed files with 911 additions and 1133 deletions

View File

@@ -298,7 +298,8 @@ with_any_broadcom = [
if ['x86_64'].contains(host_machine.cpu_family()) and \
get_option('intel-clc') != 'system'
# Require intel-clc with Anv & Iris (for internal shaders)
with_intel_clc = get_option('intel-clc') == 'enabled'
with_intel_clc = get_option('intel-clc') == 'enabled' or \
with_intel_vk
else
with_intel_clc = false
endif

View File

@@ -119,16 +119,12 @@ endforeach
genX_cl_included_symbols = [
# instructions
'MI_BATCH_BUFFER_START',
'3DSTATE_CONSTANT_ALL',
'3DSTATE_CONSTANT_VS',
'3DSTATE_RASTER',
'3DSTATE_INDEX_BUFFER',
'3DSTATE_VERTEX_BUFFERS',
'3DPRIMITIVE',
'3DPRIMITIVE_EXTENDED',
'MI_BATCH_BUFFER_START',
# structures
'3DSTATE_CONSTANT_BODY',
'VERTEX_BUFFER_STATE',
]

View File

@@ -28,6 +28,7 @@ endif
subdir('isl')
subdir('common')
subdir('compiler')
subdir('shaders')
if with_intel_hasvk or with_intel_vk or with_gallium_crocus or with_gallium_iris or with_intel_tools
subdir('perf')
endif

View File

@@ -0,0 +1,206 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "libintel_shaders.h"
void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr,
uint32_t buffer_count)
{
struct GENX(3DSTATE_VERTEX_BUFFERS) v = {
GENX(3DSTATE_VERTEX_BUFFERS_header),
};
v.DWordLength = 1 + (buffer_count * 4) -
GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
GENX(3DSTATE_VERTEX_BUFFERS_pack)(dst_ptr, &v);
}
void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr,
uint32_t mocs,
uint32_t buffer_idx,
uint64_t address,
uint32_t size,
uint32_t stride)
{
bool buffer_null = address == 0;
struct GENX(VERTEX_BUFFER_STATE) v = {
.BufferPitch = stride,
.NullVertexBuffer = address == 0,
.AddressModifyEnable = true,
.MOCS = mocs,
#if GFX_VER >= 12
.L3BypassDisable = true,
#endif
.VertexBufferIndex = buffer_idx,
.BufferStartingAddress = address,
.BufferSize = size,
};
GENX(VERTEX_BUFFER_STATE_pack)(dst_ptr, &v);
}
#if GFX_VER == 9
void genX(write_3DPRIMITIVE)(global void *dst_ptr,
bool is_predicated,
bool is_indexed,
bool uses_tbimr,
uint32_t vertex_count_per_instance,
uint32_t start_vertex_location,
uint32_t instance_count,
uint32_t start_instance_location,
uint32_t base_vertex_location)
{
struct GENX(3DPRIMITIVE) v = {
GENX(3DPRIMITIVE_header),
#if GFX_VERx10 >= 125
.TBIMREnable = uses_tbimr,
#endif
.PredicateEnable = is_predicated,
.VertexAccessType = is_indexed ? RANDOM : SEQUENTIAL,
.VertexCountPerInstance = vertex_count_per_instance,
.StartVertexLocation = start_vertex_location,
.InstanceCount = instance_count,
.StartInstanceLocation = start_instance_location,
.BaseVertexLocation = base_vertex_location,
};
GENX(3DPRIMITIVE_pack)(dst_ptr, &v);
}
#endif
#if GFX_VER >= 11
void genX(write_3DPRIMITIVE_EXTENDED)(global void *dst_ptr,
bool is_predicated,
bool is_indexed,
bool uses_tbimr,
uint32_t vertex_count_per_instance,
uint32_t start_vertex_location,
uint32_t instance_count,
uint32_t start_instance_location,
uint32_t base_vertex_location,
uint32_t param_base_vertex,
uint32_t param_base_instance,
uint32_t param_draw_id)
{
struct GENX(3DPRIMITIVE_EXTENDED) v = {
GENX(3DPRIMITIVE_EXTENDED_header),
#if GFX_VERx10 >= 125
.TBIMREnable = uses_tbimr,
#endif
.PredicateEnable = is_predicated,
.VertexAccessType = is_indexed ? RANDOM : SEQUENTIAL,
.VertexCountPerInstance = vertex_count_per_instance,
.StartVertexLocation = start_vertex_location,
.InstanceCount = instance_count,
.StartInstanceLocation = start_instance_location,
.BaseVertexLocation = base_vertex_location,
.ExtendedParameter0 = param_base_vertex,
.ExtendedParameter1 = param_base_instance,
.ExtendedParameter2 = param_draw_id,
};
GENX(3DPRIMITIVE_EXTENDED_pack)(dst_ptr, &v);
}
#endif
void genX(write_MI_BATCH_BUFFER_START)(global void *dst_ptr, uint64_t addr)
{
struct GENX(MI_BATCH_BUFFER_START) v = {
GENX(MI_BATCH_BUFFER_START_header),
.AddressSpaceIndicator = ASI_PPGTT,
.BatchBufferStartAddress = addr,
};
GENX(MI_BATCH_BUFFER_START_pack)(dst_ptr, &v);
}
void genX(write_draw)(global uint32_t *dst_ptr,
global void *indirect_ptr,
global uint32_t *draw_id_ptr,
uint32_t draw_id,
uint32_t instance_multiplier,
bool is_indexed,
bool is_predicated,
bool uses_tbimr,
bool uses_base,
bool uses_drawid,
uint32_t mocs)
{
#if GFX_VER == 9
if (uses_base || uses_drawid) {
uint32_t vertex_buffer_count =
(uses_base ? 1 : 0) + (uses_drawid ? 1 : 0);
genX(write_3DSTATE_VERTEX_BUFFERS)(dst_ptr, vertex_buffer_count);
dst_ptr += 1; /* GENX(3DSTATE_VERTEX_BUFFERS_length); */
if (uses_base) {
uint64_t base_addr = (uint64_t)indirect_ptr + (is_indexed ? 12 : 8);
genX(write_VERTEX_BUFFER_STATE)(dst_ptr, mocs, 31, base_addr, 8, 0);
dst_ptr += GENX(VERTEX_BUFFER_STATE_length);
}
if (uses_drawid) {
*draw_id_ptr = draw_id;
genX(write_VERTEX_BUFFER_STATE)(dst_ptr, mocs, 32,
(uint64_t)draw_id_ptr, 4, 0);
dst_ptr += GENX(VERTEX_BUFFER_STATE_length);
}
}
if (is_indexed) {
VkDrawIndexedIndirectCommand data =
*((global VkDrawIndexedIndirectCommand *)indirect_ptr);
genX(write_3DPRIMITIVE)(dst_ptr,
is_predicated,
is_indexed,
uses_tbimr,
data.indexCount,
data.firstIndex,
data.instanceCount * instance_multiplier,
data.firstInstance,
data.vertexOffset);
} else {
VkDrawIndirectCommand data =
*((global VkDrawIndirectCommand *)indirect_ptr);
genX(write_3DPRIMITIVE)(dst_ptr,
is_predicated,
is_indexed,
uses_tbimr,
data.vertexCount,
data.firstVertex,
data.instanceCount * instance_multiplier,
data.firstInstance,
0 /* base_vertex_location */);
}
#else
if (is_indexed) {
VkDrawIndexedIndirectCommand data =
*((global VkDrawIndexedIndirectCommand *)indirect_ptr);
genX(write_3DPRIMITIVE_EXTENDED)(dst_ptr,
is_predicated,
is_indexed,
uses_tbimr,
data.indexCount,
data.firstIndex,
data.instanceCount * instance_multiplier,
data.firstInstance,
data.vertexOffset,
data.vertexOffset,
data.firstInstance,
draw_id);
} else {
VkDrawIndirectCommand data =
*((global VkDrawIndirectCommand *)indirect_ptr);
genX(write_3DPRIMITIVE_EXTENDED)(dst_ptr,
is_predicated,
is_indexed,
uses_tbimr,
data.vertexCount,
data.firstVertex,
data.instanceCount * instance_multiplier,
data.firstInstance,
0 /* base_vertex_location */,
data.firstVertex,
data.firstInstance,
draw_id);
}
#endif
}

View File

@@ -0,0 +1,77 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "libintel_shaders.h"
static void end_generated_draws(global void *dst_ptr,
uint32_t item_idx,
uint32_t draw_id, uint32_t draw_count,
uint32_t ring_count, uint32_t max_draw_count,
uint32_t flags,
uint64_t gen_addr, uint64_t end_addr)
{
uint32_t _3dprim_size_B = ((flags >> 16) & 0xff) * 4;
bool indirect_count = (flags & ANV_GENERATED_FLAG_COUNT) != 0;
bool ring_mode = (flags & ANV_GENERATED_FLAG_RING_MODE) != 0;
/* We can have an indirect draw count = 0. */
uint32_t last_draw_id = draw_count == 0 ? 0 : (min(draw_count, max_draw_count) - 1);
global void *jump_dst = draw_count == 0 ? dst_ptr : (dst_ptr + _3dprim_size_B);
if (ring_mode) {
if (draw_id == last_draw_id) {
/* Exit the ring buffer to the next user commands */
genX(write_MI_BATCH_BUFFER_START)(jump_dst, end_addr);
} else if (item_idx == (ring_count - 1)) {
/* Jump back to the generation shader to generate mode draws */
genX(write_MI_BATCH_BUFFER_START)(jump_dst, gen_addr);
}
} else {
if (draw_id == last_draw_id && draw_count < max_draw_count) {
/* Skip forward to the end of the generated draws */
genX(write_MI_BATCH_BUFFER_START)(jump_dst, end_addr);
}
}
}
void
genX(libanv_write_draw)(global void *dst_base,
global void *indirect_base,
global void *draw_id_base,
uint32_t indirect_stride,
global uint32_t *_draw_count,
uint32_t draw_base,
uint32_t instance_multiplier,
uint32_t max_draw_count,
uint32_t flags,
uint32_t ring_count,
uint64_t gen_addr,
uint64_t end_addr,
uint32_t item_idx)
{
uint32_t _3dprim_size_B = ((flags >> 16) & 0xff) * 4;
uint32_t draw_id = draw_base + item_idx;
uint32_t draw_count = *_draw_count;
global void *dst_ptr = dst_base + item_idx * _3dprim_size_B;
global void *indirect_ptr = indirect_base + draw_id * indirect_stride;
global void *draw_id_ptr = draw_id_base + item_idx * 4;
if (draw_id < min(draw_count, max_draw_count)) {
bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0;
bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
bool uses_tbimr = (flags & ANV_GENERATED_FLAG_TBIMR) != 0;
bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0;
bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0;
uint32_t mocs = (flags >> 8) & 0xff;
genX(write_draw)(dst_ptr, indirect_ptr, draw_id_ptr,
draw_id, instance_multiplier,
is_indexed, is_predicated,
uses_tbimr, uses_base, uses_drawid,
mocs);
}
end_generated_draws(dst_ptr, item_idx, draw_id, draw_count,
ring_count, max_draw_count, flags,
gen_addr, end_addr);
}

View File

@@ -0,0 +1,127 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#ifndef _LIBANV_SHADERS_H_
#define _LIBANV_SHADERS_H_
/* Define stdint types compatible between the CPU and GPU for shared headers */
#ifndef __OPENCL_VERSION__
#include <stdint.h>
#include "util/macros.h"
#else
#define BITFIELD_BIT(i) (1u << i)
typedef ulong uint64_t;
typedef uint uint32_t;
typedef ushort uint16_t;
typedef uchar uint8_t;
typedef long int64_t;
typedef int int32_t;
typedef short int16_t;
typedef char int8_t;
typedef struct VkDrawIndexedIndirectCommand {
uint32_t indexCount;
uint32_t instanceCount;
uint32_t firstIndex;
int32_t vertexOffset;
uint32_t firstInstance;
} VkDrawIndexedIndirectCommand __attribute__((aligned(4)));
typedef struct VkDrawIndirectCommand {
uint32_t vertexCount;
uint32_t instanceCount;
uint32_t firstVertex;
uint32_t firstInstance;
} VkDrawIndirectCommand __attribute__((aligned(4)));
#include "genxml/gen_macros.h"
#include "genxml/genX_cl_pack.h"
#endif
/**
* Flags for generated_draws.cl
*/
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
/* Only used on Gfx9, means the pipeline is using gl_DrawID */
#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2)
/* Only used on Gfx9, means the pipeline is using gl_BaseVertex or
* gl_BaseInstance
*/
#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3)
/* Whether the count is indirect */
#define ANV_GENERATED_FLAG_COUNT BITFIELD_BIT(4)
/* Whether the generation shader writes to the ring buffer */
#define ANV_GENERATED_FLAG_RING_MODE BITFIELD_BIT(5)
/* Whether TBIMR tile-based rendering shall be enabled. */
#define ANV_GENERATED_FLAG_TBIMR BITFIELD_BIT(6)
/**
* Flags for query_copy.cl
*/
#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0)
#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2)
#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3)
#ifdef __OPENCL_VERSION__
void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr,
uint32_t buffer_count);
void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr,
uint32_t mocs,
uint32_t buffer_idx,
uint64_t address,
uint32_t size,
uint32_t stride);
#if GFX_VER == 9
void genX(write_3DPRIMITIVE)(global void *dst_ptr,
bool is_predicated,
bool is_indexed,
bool use_tbimr,
uint32_t vertex_count_per_instance,
uint32_t start_vertex_location,
uint32_t instance_count,
uint32_t start_instance_location,
uint32_t base_vertex_location);
#endif
#if GFX_VER >= 11
void genX(write_3DPRIMITIVE_EXTENDED)(global void *dst_ptr,
bool is_predicated,
bool is_indexed,
bool use_tbimr,
uint32_t vertex_count_per_instance,
uint32_t start_vertex_location,
uint32_t instance_count,
uint32_t start_instance_location,
uint32_t base_vertex_location,
uint32_t param_base_vertex,
uint32_t param_base_instance,
uint32_t param_draw_id);
#endif
void genX(write_MI_BATCH_BUFFER_START)(global void *dst_ptr, uint64_t addr);
void genX(write_draw)(global uint32_t *dst_ptr,
global void *indirect_ptr,
global uint32_t *draw_id_ptr,
uint32_t draw_id,
uint32_t instance_multiplier,
bool is_indexed,
bool is_predicated,
bool uses_tbimr,
bool uses_base,
bool uses_draw_id,
uint32_t mocs);
#endif /* __OPENCL_VERSION__ */
#endif /* _LIBANV_SHADERS_H_ */

View File

@@ -0,0 +1,23 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
void
genX(libanv_memcpy)(global void *dst_base,
global void *src_base,
uint num_dwords,
uint dword_offset)
{
global void *dst = dst_base + 4 * dword_offset;
global void *src = src_base + 4 * dword_offset;
if (dword_offset + 4 <= num_dwords) {
*(global uint4 *)(dst) = *(global uint4 *)(src);
} else if (dword_offset + 3 <= num_dwords) {
*(global uint3 *)(dst) = *(global uint3 *)(src);
} else if (dword_offset + 2 <= num_dwords) {
*(global uint2 *)(dst) = *(global uint2 *)(src);
} else if (dword_offset + 1 <= num_dwords) {
*(global uint *)(dst) = *(global uint *)(src);
}
}

View File

@@ -0,0 +1,63 @@
# Copyright © 2023 Intel Corporation
# SPDX-License-Identifier: MIT
intel_float64_spv_h = custom_target(
'float64_spv.h',
input : [glsl2spirv, float64_glsl_file],
output : 'float64_spv.h',
command : [
prog_python, '@INPUT@', '@OUTPUT@',
prog_glslang,
'--create-entry', 'main',
'--vn', 'float64_spv_source',
'--glsl-version', '450',
'-Olib',
]
)
intel_shader_files = files(
'libintel_shaders.h',
'generate.cl',
'generate_draws.cl',
'memcpy.cl',
'query_copy.cl',
)
prepended_input_args = []
foreach input_arg : intel_shader_files
prepended_input_args += ['--in', input_arg]
endforeach
intel_shaders_gens = [ [ 90, 9],
[110, 11],
[120, 12],
[125, 125],
[200, 20] ]
intel_shaders = []
foreach gen : intel_shaders_gens
intel_shaders += custom_target(
'intel_gfx@0@_shaders_code.h'.format(gen[1]),
input : intel_shader_files,
output : 'intel_gfx@0@_shaders_code.h'.format(gen[1]),
command : [
prog_intel_clc, '--nir',
'--prefix', 'gfx@0@_intel_shaders'.format(gen[1]),
prepended_input_args, '-o', '@OUTPUT@', '--',
'-cl-std=cl2.0', '-D__OPENCL_VERSION__=200',
'-DGFX_VERx10=@0@'.format(gen[0]),
'-I' + join_paths(meson.current_source_dir(), '.'),
'-I' + join_paths(meson.source_root(), 'src'),
'-I' + join_paths(meson.source_root(), 'src/intel'),
'-I' + join_paths(meson.build_root(), 'src/intel'),
'-I' + join_paths(meson.source_root(), 'src/intel/genxml'),
'-include', 'opencl-c.h',
],
env: ['MESA_SHADER_CACHE_DISABLE=true'],
depends : [dep_prog_intel_clc, gen_cl_xml_pack],
)
endforeach
idep_intel_shaders = declare_dependency(
sources : intel_shaders,
include_directories : include_directories('.'),
)

View File

@@ -0,0 +1,72 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
void
genX(libanv_query_copy)(global void *destination_base,
uint32_t destination_stride,
global void *query_data,
uint32_t first_query,
uint32_t num_queries,
uint32_t query_data_offset,
uint32_t query_stride,
uint32_t num_query_items,
uint32_t copy_flags,
uint32_t copy_item_idx)
{
if (copy_item_idx >= num_queries)
return;
bool is_result64 = (copy_flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0;
bool write_available = (copy_flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0;
bool compute_delta = (copy_flags & ANV_COPY_QUERY_FLAG_DELTA) != 0;
bool partial_result = (copy_flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0;
uint query_byte = (first_query + copy_item_idx) * query_stride;
uint query_data_byte = query_byte + query_data_offset;
global uint64_t *query = query_data + (first_query + copy_item_idx) * query_stride;
global uint64_t *dest64 = destination_base + copy_item_idx * destination_stride;
global uint32_t *dest32 = destination_base + copy_item_idx * destination_stride;
uint64_t availability = *(global uint32_t *)(query_data + query_byte);
if (write_available) {
if (is_result64)
dest64[num_query_items] = availability;
else
dest32[num_query_items] = availability;
}
for (uint32_t i = 0; i < num_query_items; i++) {
uint32_t qw_offset = 1 + i * 2;
uint64_t v;
if (compute_delta) {
struct delta64 {
uint64_t v0;
uint64_t v1;
} data = *((global struct delta64 *)&query[qw_offset]);
v = data.v1 - data.v0;
} else {
v = query[qw_offset + 0];
}
/* vkCmdCopyQueryPoolResults:
*
* "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is
* unavailable, an intermediate result between zero and the final
* result value is written for that query."
*
* We write 0 as the values not being written yet, we can't really make
* provide any sensible value.
*/
if (partial_result && availability == 0)
v = 0;
if (is_result64)
dest64[i] = v;
else
dest32[i] = v;
}
}

View File

@@ -39,6 +39,9 @@
struct intel_sample_positions;
struct intel_urb_config;
typedef struct nir_builder nir_builder;
typedef struct nir_shader nir_shader;
extern const uint32_t genX(vk_to_intel_cullmode)[];
extern const uint32_t genX(vk_to_intel_front_face)[];
@@ -59,6 +62,11 @@ VkResult genX(init_device_state)(struct anv_device *device);
void genX(init_cps_device_state)(struct anv_device *device);
nir_shader *genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx);
uint32_t genX(call_internal_shader)(nir_builder *b,
enum anv_internal_kernel_name shader_name);
void
genX(set_fast_clear_state)(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,

View File

@@ -25,7 +25,8 @@
#include "compiler/brw_compiler.h"
#include "compiler/brw_nir.h"
#include "compiler/spirv/nir_spirv.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "dev/intel_debug.h"
#include "util/macros.h"
@@ -33,78 +34,6 @@
#include "anv_internal_kernels.h"
#include "shaders/generated_draws_spv.h"
#include "shaders/query_copy_compute_spv.h"
#include "shaders/query_copy_fragment_spv.h"
#include "shaders/memcpy_compute_spv.h"
static bool
lower_vulkan_descriptors_instr(nir_builder *b, nir_intrinsic_instr *intrin,
void *cb_data)
{
if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
return false;
nir_instr *res_index_instr = intrin->src[0].ssa->parent_instr;
assert(res_index_instr->type == nir_instr_type_intrinsic);
nir_intrinsic_instr *res_index_intrin =
nir_instr_as_intrinsic(res_index_instr);
assert(res_index_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
b->cursor = nir_after_instr(&intrin->instr);
const struct anv_internal_kernel_bind_map *bind_map = cb_data;
uint32_t binding = nir_intrinsic_binding(res_index_intrin);
assert(binding < bind_map->num_bindings);
nir_def *desc_value = NULL;
if (bind_map->bindings[binding].push_constant) {
desc_value =
nir_vec2(b,
nir_imm_int(b, binding),
nir_imm_int(b, 0));
} else {
int push_constant_binding = -1;
for (uint32_t i = 0; i < bind_map->num_bindings; i++) {
if (bind_map->bindings[i].push_constant) {
push_constant_binding = i;
break;
}
}
assert(push_constant_binding != -1);
desc_value =
nir_load_ubo(b, 1, 64,
nir_imm_int(b, push_constant_binding),
nir_imm_int(b,
bind_map->bindings[binding].address_offset),
.align_mul = 8,
.align_offset = 0,
.range_base = 0,
.range = ~0);
desc_value =
nir_vec4(b,
nir_unpack_64_2x32_split_x(b, desc_value),
nir_unpack_64_2x32_split_y(b, desc_value),
nir_imm_int(b, 0),
nir_imm_int(b, 0));
}
nir_def_rewrite_uses(&intrin->def, desc_value);
return true;
}
static bool
lower_vulkan_descriptors(nir_shader *shader,
const struct anv_internal_kernel_bind_map *bind_map)
{
return nir_shader_intrinsics_pass(shader, lower_vulkan_descriptors_instr,
nir_metadata_block_index |
nir_metadata_dominance,
(void *)bind_map);
}
static bool
lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
UNUSED void *data)
@@ -117,60 +46,44 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
return true;
}
static bool
lower_load_ubo_to_uniforms(nir_builder *b, nir_intrinsic_instr *intrin,
void *cb_data)
static void
link_libanv(nir_shader *nir, const nir_shader *libanv)
{
if (intrin->intrinsic != nir_intrinsic_load_ubo)
return false;
b->cursor = nir_instr_remove(&intrin->instr);
nir_def_rewrite_uses(
&intrin->def,
nir_load_uniform(b,
intrin->def.num_components,
intrin->def.bit_size,
intrin->src[1].ssa,
.base = 0,
.range = intrin->def.num_components *
intrin->def.bit_size / 8));
return true;
nir_link_shader_functions(nir, libanv);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_remove_non_entrypoints);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
glsl_get_cl_type_size_align);
NIR_PASS_V(nir, nir_opt_deref);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
nir_var_mem_global,
nir_address_format_62bit_generic);
}
static struct anv_shader_bin *
compile_upload_spirv(struct anv_device *device,
gl_shader_stage stage,
const char *name,
const void *hash_key,
uint32_t hash_key_size,
const struct anv_internal_kernel_bind_map *bind_map,
const uint32_t *spirv_source,
uint32_t spirv_source_size,
uint32_t sends_count_expectation)
compile_shader(struct anv_device *device,
const nir_shader *libanv,
enum anv_internal_kernel_name shader_name,
gl_shader_stage stage,
const char *name,
const void *hash_key,
uint32_t hash_key_size,
uint32_t sends_count_expectation)
{
struct spirv_to_nir_options spirv_options = {
.caps = {
.int64 = true,
},
.ubo_addr_format = nir_address_format_32bit_index_offset,
.ssbo_addr_format = nir_address_format_64bit_global_32bit_offset,
.environment = NIR_SPIRV_VULKAN,
.create_library = false,
};
const nir_shader_compiler_options *nir_options =
device->physical->compiler->nir_options[stage];
nir_shader* nir =
vk_spirv_to_nir(&device->vk, spirv_source, spirv_source_size * 4,
stage, "main", 0, NULL, &spirv_options,
nir_options, true /* internal */,
NULL);
nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
"%s", name);
assert(nir != NULL);
uint32_t uniform_size =
anv_genX(device->info, call_internal_shader)(&b, shader_name);
nir->info.name = ralloc_strdup(nir, name);
nir_shader *nir = b.shader;
link_libanv(nir, libanv);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_opt_cse);
@@ -182,6 +95,12 @@ compile_upload_spirv(struct anv_device *device,
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
if (stage == MESA_SHADER_COMPUTE) {
nir->info.workgroup_size[0] = 16;
nir->info.workgroup_size[1] = 1;
nir->info.workgroup_size[2] = 1;
}
struct brw_compiler *compiler = device->physical->compiler;
struct brw_nir_compiler_opts opts = {};
brw_preprocess_nir(compiler, nir, &opts);
@@ -205,25 +124,12 @@ compile_upload_spirv(struct anv_device *device,
nir_metadata_block_index | nir_metadata_dominance, NULL);
}
/* Reset sizes before gathering information */
nir->global_mem_size = 0;
nir->scratch_size = 0;
nir->info.shared_size = 0;
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
/* Do vectorizing here. For some reason when trying to do it in the back
* this just isn't working.
*/
nir_load_store_vectorize_options options = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo,
.callback = brw_nir_should_vectorize_mem,
.robust_modes = (nir_variable_mode)0,
};
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
NIR_PASS_V(nir, lower_vulkan_descriptors, bind_map);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
nir_address_format_32bit_index_offset);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
nir_address_format_64bit_global_32bit_offset);
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_opt_dce);
@@ -235,14 +141,22 @@ compile_upload_spirv(struct anv_device *device,
memset(&prog_data, 0, sizeof(prog_data));
if (stage == MESA_SHADER_COMPUTE) {
NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_load_ubo_to_uniforms,
nir_metadata_block_index | nir_metadata_dominance,
NULL);
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, device->info,
&prog_data.cs);
nir->num_uniforms = bind_map->push_data_size;
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics,
device->info, &prog_data.cs);
}
/* Do vectorizing here. For some reason when trying to do it in the back
* this just isn't working.
*/
nir_load_store_vectorize_options options = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
.callback = brw_nir_should_vectorize_mem,
.robust_modes = (nir_variable_mode)0,
};
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
nir->num_uniforms = uniform_size;
prog_data.base.nr_params = nir->num_uniforms / 4;
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
@@ -270,18 +184,21 @@ compile_upload_spirv(struct anv_device *device,
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends == sends_count_expectation);
assert(stats[stat_idx].spills == 0);
stat_idx++;
}
if (prog_data.wm.dispatch_16) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends == sends_count_expectation);
assert(stats[stat_idx].spills == 0);
stat_idx++;
}
if (prog_data.wm.dispatch_32) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends == sends_count_expectation * 2);
assert(stats[stat_idx].spills == 0);
stat_idx++;
}
} else {
@@ -304,6 +221,8 @@ compile_upload_spirv(struct anv_device *device,
assert(stats.sends == sends_count_expectation);
}
assert(prog_data.base.total_scratch == 0);
struct anv_pipeline_bind_map empty_bind_map = {};
struct anv_push_descriptor_info empty_push_desc_info = {};
struct anv_shader_upload_params upload_params = {
@@ -336,6 +255,11 @@ anv_device_init_internal_kernels(struct anv_device *device)
false /* needs_slm */);
device->internal_kernels_l3_config = intel_get_l3_config(device->info, w);
void *mem_ctx = ralloc_context(NULL);
nir_shader *libanv_shaders =
anv_genX(device->info, load_libanv_shader)(device, mem_ctx);
const struct {
struct {
char name[40];
@@ -343,134 +267,54 @@ anv_device_init_internal_kernels(struct anv_device *device)
gl_shader_stage stage;
const uint32_t *spirv_data;
uint32_t spirv_size;
uint32_t send_count;
struct anv_internal_kernel_bind_map bind_map;
} internal_kernels[] = {
[ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
.key = {
.name = "anv-generated-indirect-draws",
},
.stage = MESA_SHADER_FRAGMENT,
.spirv_data = generated_draws_spv_source,
.spirv_size = ARRAY_SIZE(generated_draws_spv_source),
.send_count = /* 2 * (2 loads + 3 stores) + ** gfx11 **
* 2 * (2 loads + 6 stores) + ** gfx9 **
* 1 load + 3 store
*/ 29,
.bind_map = {
.num_bindings = 5,
.bindings = {
{
.address_offset = offsetof(struct anv_generated_indirect_params,
indirect_data_addr),
},
{
.address_offset = offsetof(struct anv_generated_indirect_params,
generated_cmds_addr),
},
{
.address_offset = offsetof(struct anv_generated_indirect_params,
draw_ids_addr),
},
{
.address_offset = offsetof(struct anv_generated_indirect_params,
draw_count_addr),
},
{
.push_constant = true,
},
},
.push_data_size = sizeof(struct anv_generated_indirect_params),
},
.send_count = (device->info->ver == 9 ?
/* 1 load +
* 4 stores +
* 2 * (2 loads + 2 stores) +
* 3 stores
*/
16 :
/* 1 load +
* 2 * (2 loads + 3 stores) +
* 3 stores
*/
14),
},
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
.key = {
.name = "anv-copy-query-compute",
},
.stage = MESA_SHADER_COMPUTE,
.spirv_data = query_copy_compute_spv_source,
.spirv_size = ARRAY_SIZE(query_copy_compute_spv_source),
.send_count = device->info->verx10 >= 125 ?
9 /* 4 loads + 4 stores + 1 EOT */ :
8 /* 3 loads + 4 stores + 1 EOT */,
.bind_map = {
.num_bindings = 3,
.bindings = {
{
.address_offset = offsetof(struct anv_query_copy_params,
query_data_addr),
},
{
.address_offset = offsetof(struct anv_query_copy_params,
destination_addr),
},
{
.push_constant = true,
},
},
.push_data_size = sizeof(struct anv_query_copy_params),
},
},
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
.key = {
.name = "anv-copy-query-fragment",
},
.stage = MESA_SHADER_FRAGMENT,
.spirv_data = query_copy_fragment_spv_source,
.spirv_size = ARRAY_SIZE(query_copy_fragment_spv_source),
.send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
.bind_map = {
.num_bindings = 3,
.bindings = {
{
.address_offset = offsetof(struct anv_query_copy_params,
query_data_addr),
},
{
.address_offset = offsetof(struct anv_query_copy_params,
destination_addr),
},
{
.push_constant = true,
},
},
.push_data_size = sizeof(struct anv_query_copy_params),
},
},
[ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = {
.key = {
.name = "anv-memcpy-compute",
},
.stage = MESA_SHADER_COMPUTE,
.spirv_data = memcpy_compute_spv_source,
.spirv_size = ARRAY_SIZE(memcpy_compute_spv_source),
.send_count = device->info->verx10 >= 125 ?
10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ :
9 /* 4 loads + 4 stores + 1 EOT */,
.bind_map = {
.num_bindings = 3,
.bindings = {
{
.address_offset = offsetof(struct anv_memcpy_params,
src_addr),
},
{
.address_offset = offsetof(struct anv_memcpy_params,
dst_addr),
},
{
.push_constant = true,
},
},
.push_data_size = sizeof(struct anv_memcpy_params),
},
},
};
VkResult result = VK_SUCCESS;
for (uint32_t i = 0; i < ARRAY_SIZE(internal_kernels); i++) {
device->internal_kernels[i] =
anv_device_search_for_kernel(device,
@@ -480,18 +324,19 @@ anv_device_init_internal_kernels(struct anv_device *device)
NULL);
if (device->internal_kernels[i] == NULL) {
device->internal_kernels[i] =
compile_upload_spirv(device,
internal_kernels[i].stage,
internal_kernels[i].key.name,
&internal_kernels[i].key,
sizeof(internal_kernels[i].key),
&internal_kernels[i].bind_map,
internal_kernels[i].spirv_data,
internal_kernels[i].spirv_size,
internal_kernels[i].send_count);
compile_shader(device,
libanv_shaders,
i,
internal_kernels[i].stage,
internal_kernels[i].key.name,
&internal_kernels[i].key,
sizeof(internal_kernels[i].key),
internal_kernels[i].send_count);
}
if (device->internal_kernels[i] == NULL) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto error;
}
if (device->internal_kernels[i] == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* The cache already has a reference and it's not going anywhere so
* there is no need to hold a second reference.
@@ -499,7 +344,8 @@ anv_device_init_internal_kernels(struct anv_device *device)
anv_shader_bin_unref(device, device->internal_kernels[i]);
}
return VK_SUCCESS;
error:
return result;
}
void

View File

@@ -24,21 +24,51 @@
#ifndef ANV_GENERATED_INDIRECT_DRAWS_H
#define ANV_GENERATED_INDIRECT_DRAWS_H
#include "shaders/interface.h"
#include "libintel_shaders.h"
struct PACKED anv_generated_indirect_params {
struct anv_generated_indirect_draw_params draw;
struct PACKED anv_gen_indirect_params {
/* Draw ID buffer address (only used on Gfx9) */
uint64_t draw_id_addr;
/* Global address of binding 0 */
/* Indirect data buffer address (only used on Gfx9) */
uint64_t indirect_data_addr;
/* Global address of binding 1 */
/* Stride between each elements of the indirect data buffer */
uint32_t indirect_data_stride;
uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws */
/* Base number of the draw ID, it is added to the index computed from the
* gl_FragCoord
*/
uint32_t draw_base;
/* Maximum number of draws (equals to draw_count for indirect draws without
* an indirect count)
*/
uint32_t max_draw_count;
/* Number of draws to generate in the ring buffer (only useful in ring
* buffer mode)
*/
uint32_t ring_count;
/* Instance multiplier for multi view */
uint32_t instance_multiplier;
/* Address where to jump at to generate further draws (used with ring mode)
*/
uint64_t gen_addr;
/* Address where to jump at after the generated draw (only used with
* indirect draw count variants)
*/
uint64_t end_addr;
/* Destination of the generated draw commands */
uint64_t generated_cmds_addr;
/* Global address of binding 2 */
uint64_t draw_ids_addr;
/* Global address of binding 3 (points to the draw_count field above) */
/* Draw count address (points to the draw_count field in cases) */
uint64_t draw_count_addr;
/* Draw count value for non count variants of draw indirect commands */
@@ -48,31 +78,53 @@ struct PACKED anv_generated_indirect_params {
* split into smaller chunks, see while loop in
* genX(cmd_buffer_emit_indirect_generated_draws)
*/
struct anv_generated_indirect_params *prev;
struct anv_gen_indirect_params *prev;
};
struct PACKED anv_query_copy_params {
struct anv_query_copy_shader_params copy;
/* ANV_COPY_QUERY_FLAG_* flags */
uint32_t flags;
/* Number of queries to copy */
uint32_t num_queries;
/* Number of items to write back in the results per query */
uint32_t num_items;
/* First query to copy result from */
uint32_t query_base;
/* Query stride in bytes */
uint32_t query_stride;
/* Offset at which the data should be read from */
uint32_t query_data_offset;
/* Stride of destination writes */
uint32_t destination_stride;
/* We need to be 64 bit aligned, or 32 bit builds get
* very unhappy.
*/
uint32_t padding;
/* Address of the query pool */
uint64_t query_data_addr;
/* Destination address of the results */
uint64_t destination_addr;
};
/* This needs to match memcpy_compute.glsl :
*
* layout(set = 0, binding = 2) uniform block
*/
struct PACKED anv_memcpy_shader_params {
uint32_t num_dwords;
uint32_t pad;
};
struct PACKED anv_memcpy_params {
struct anv_memcpy_shader_params copy;
/* Number of dwords to copy*/
uint32_t num_dwords;
uint32_t pad;
/* Source address of the copy */
uint64_t src_addr;
/* Destination address of the copy */
uint64_t dst_addr;
};

View File

@@ -1635,20 +1635,6 @@ enum anv_internal_kernel_name {
ANV_INTERNAL_KERNEL_COUNT,
};
struct anv_internal_kernel_bind_map {
uint32_t num_bindings;
struct {
/* Whether this binding is provided through push constants */
bool push_constant;
/* When not provided by push constants, this is offset at which the
* 64bit address of the binding is located in the push constant data.
*/
uint32_t address_offset;
} bindings[5];
uint32_t push_data_size;
};
enum anv_rt_bvh_build_method {
ANV_BVH_BUILD_METHOD_TRIVIAL,
ANV_BVH_BUILD_METHOD_NEW_SAH,

View File

@@ -138,15 +138,13 @@ anv_device_utrace_emit_cs_copy_ts_buffer(struct u_trace_context *utctx,
struct anv_memcpy_params *params = push_data_state.map;
*params = (struct anv_memcpy_params) {
.copy = {
.num_dwords = count * sizeof(union anv_utrace_timestamp) / 4,
},
.src_addr = anv_address_physical(from_addr),
.dst_addr = anv_address_physical(to_addr),
.num_dwords = count * sizeof(union anv_utrace_timestamp) / 4,
.src_addr = anv_address_physical(from_addr),
.dst_addr = anv_address_physical(to_addr),
};
anv_genX(device->info, emit_simple_shader_dispatch)(
&submit->simple_state, DIV_ROUND_UP(params->copy.num_dwords, 4),
&submit->simple_state, DIV_ROUND_UP(params->num_dwords, 4),
push_data_state);
}

View File

@@ -60,7 +60,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_state push_data_state =
genX(simple_shader_alloc_push)(simple_state,
sizeof(struct anv_generated_indirect_params));
sizeof(struct anv_gen_indirect_params));
if (push_data_state.map == NULL)
return ANV_STATE_NULL;
@@ -73,42 +73,37 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
if (anv_address_is_null(count_addr)) {
draw_count_addr = anv_address_add(
genX(simple_shader_push_state_address)(simple_state, push_data_state),
offsetof(struct anv_generated_indirect_params, draw_count));
offsetof(struct anv_gen_indirect_params, draw_count));
} else {
draw_count_addr = count_addr;
}
struct anv_generated_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generated_indirect_params) {
.draw = {
.draw_id_addr = anv_address_physical(draw_id_addr),
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.indirect_data_stride = indirect_data_stride,
.flags = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
(indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
(cmd_buffer->state.conditional_render_enabled ?
ANV_GENERATED_FLAG_PREDICATED : 0) |
((vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance) ?
ANV_GENERATED_FLAG_BASE : 0) |
(vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
(anv_mocs(device, indirect_data_addr.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
(!anv_address_is_null(count_addr) ?
ANV_GENERATED_FLAG_COUNT : 0) |
(ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
((generated_cmd_stride / 4) << 16) |
device->info->ver << 24,
.draw_base = item_base,
.max_draw_count = max_count,
.ring_count = ring_count,
.instance_multiplier = pipeline->instance_multiplier,
},
.draw_count = anv_address_is_null(count_addr) ? max_count : 0,
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
.draw_ids_addr = anv_address_physical(draw_id_addr),
.draw_count_addr = anv_address_physical(draw_count_addr),
struct anv_gen_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_gen_indirect_params) {
.draw_id_addr = anv_address_physical(draw_id_addr),
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.indirect_data_stride = indirect_data_stride,
.flags = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
(indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
(cmd_buffer->state.conditional_render_enabled ?
ANV_GENERATED_FLAG_PREDICATED : 0) |
((vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance) ?
ANV_GENERATED_FLAG_BASE : 0) |
(vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
(anv_mocs(device, indirect_data_addr.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
(!anv_address_is_null(count_addr) ?
ANV_GENERATED_FLAG_COUNT : 0) |
(ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
((generated_cmd_stride / 4) << 16),
.draw_base = item_base,
.max_draw_count = max_count,
.ring_count = ring_count,
.instance_multiplier = pipeline->instance_multiplier,
.draw_count = anv_address_is_null(count_addr) ? max_count : 0,
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
.draw_count_addr = anv_address_physical(draw_count_addr),
};
genX(emit_simple_shader_dispatch)(simple_state, item_count, push_data_state);
@@ -212,7 +207,7 @@ genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
static void
genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
struct anv_generated_indirect_params *params)
struct anv_gen_indirect_params *params)
{
/* We don't know the end_addr until we have emitted all the generation
* draws. Go and edit the address of all the push parameters.
@@ -220,7 +215,7 @@ genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
uint64_t end_addr =
anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
while (params != NULL) {
params->draw.end_addr = end_addr;
params->end_addr = end_addr;
params = params->prev;
}
}
@@ -295,7 +290,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cm
const uint32_t draw_cmd_stride =
genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
struct anv_generated_indirect_params *last_params = NULL;
struct anv_gen_indirect_params *last_params = NULL;
uint32_t item_base = 0;
while (item_base < max_draw_count) {
const uint32_t item_count = MIN2(max_draw_count - item_base,
@@ -329,7 +324,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cm
max_draw_count,
indexed,
0 /* ring_count */);
struct anv_generated_indirect_params *params = params_state.map;
struct anv_gen_indirect_params *params = params_state.map;
if (params == NULL)
return;
@@ -498,7 +493,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
max_draw_count,
indexed,
ring_count);
struct anv_generated_indirect_params *params = params_state.map;
struct anv_gen_indirect_params *params = params_state.map;
anv_add_pending_pipe_bits(cmd_buffer,
#if GFX_VER == 9
@@ -561,7 +556,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
struct anv_address draw_base_addr = anv_address_add(
genX(simple_shader_push_state_address)(
&simple_state, params_state),
offsetof(struct anv_generated_indirect_params, draw.draw_base));
offsetof(struct anv_gen_indirect_params, draw_base));
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device,
&draw_base_addr);
@@ -594,8 +589,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
"after generated draws end");
params->draw.gen_addr = anv_address_physical(inc_addr);
params->draw.end_addr = anv_address_physical(end_addr);
params->gen_addr = anv_address_physical(inc_addr);
params->end_addr = anv_address_physical(end_addr);
}
}

View File

@@ -0,0 +1,111 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "anv_private.h"
#include "anv_internal_kernels.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_serialize.h"
#if GFX_VERx10 == 90
# include "intel_gfx9_shaders_code.h"
#elif GFX_VERx10 == 110
# include "intel_gfx11_shaders_code.h"
#elif GFX_VERx10 == 120
# include "intel_gfx12_shaders_code.h"
#elif GFX_VERx10 == 125
# include "intel_gfx125_shaders_code.h"
#elif GFX_VERx10 == 200
# include "intel_gfx20_shaders_code.h"
#else
# error "Unsupported generation"
#endif
#include "genxml/gen_macros.h"
#define load_param(b, bit_size, struct_name, field_name) \
nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
.base = offsetof(struct_name, field_name), \
.range = bit_size / 8)
static nir_def *
load_fragment_index(nir_builder *b)
{
nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
return nir_iadd(b,
nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
nir_channel(b, pos_in, 0));
}
static nir_def *
load_compute_index(nir_builder *b)
{
return nir_channel(b, nir_load_global_invocation_id_zero_base(b, 32), 0);
}
nir_shader *
genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx)
{
const nir_shader_compiler_options *nir_options =
device->physical->compiler->nir_options[MESA_SHADER_KERNEL];
struct blob_reader blob;
blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
sizeof(genX(intel_shaders_nir)));
return nir_deserialize(mem_ctx, nir_options, &blob);
}
uint32_t
genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_name)
{
switch (shader_name) {
case ANV_INTERNAL_KERNEL_GENERATED_DRAWS:
genX(libanv_write_draw)(
b,
load_param(b, 64, struct anv_gen_indirect_params, generated_cmds_addr),
load_param(b, 64, struct anv_gen_indirect_params, indirect_data_addr),
load_param(b, 64, struct anv_gen_indirect_params, draw_id_addr),
load_param(b, 32, struct anv_gen_indirect_params, indirect_data_stride),
load_param(b, 64, struct anv_gen_indirect_params, draw_count_addr),
load_param(b, 32, struct anv_gen_indirect_params, draw_base),
load_param(b, 32, struct anv_gen_indirect_params, instance_multiplier),
load_param(b, 32, struct anv_gen_indirect_params, max_draw_count),
load_param(b, 32, struct anv_gen_indirect_params, flags),
load_param(b, 32, struct anv_gen_indirect_params, ring_count),
load_param(b, 64, struct anv_gen_indirect_params, gen_addr),
load_param(b, 64, struct anv_gen_indirect_params, end_addr),
load_fragment_index(b));
return sizeof(struct anv_gen_indirect_params);
case ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE:
case ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT:
genX(libanv_query_copy)(
b,
load_param(b, 64, struct anv_query_copy_params, destination_addr),
load_param(b, 32, struct anv_query_copy_params, destination_stride),
load_param(b, 64, struct anv_query_copy_params, query_data_addr),
load_param(b, 32, struct anv_query_copy_params, query_base),
load_param(b, 32, struct anv_query_copy_params, num_queries),
load_param(b, 32, struct anv_query_copy_params, query_data_offset),
load_param(b, 32, struct anv_query_copy_params, query_stride),
load_param(b, 32, struct anv_query_copy_params, num_items),
load_param(b, 32, struct anv_query_copy_params, flags),
shader_name == ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE ?
load_compute_index(b) : load_fragment_index(b));
return sizeof(struct anv_query_copy_params);
case ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE:
genX(libanv_memcpy)(
b,
load_param(b, 64, struct anv_memcpy_params, dst_addr),
load_param(b, 64, struct anv_memcpy_params, src_addr),
load_param(b, 32, struct anv_memcpy_params, num_dwords),
nir_imul_imm(b, load_compute_index(b), 4));
return sizeof(struct anv_memcpy_params);
default:
unreachable("Invalid shader name");
break;
}
}

View File

@@ -1799,20 +1799,18 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
}
*params = (struct anv_query_copy_params) {
.copy = {
.flags = copy_flags,
.num_queries = query_count,
.num_items = num_items,
.query_base = first_query,
.query_stride = pool->stride,
.query_data_offset = data_offset,
.destination_stride = dest_stride,
},
.query_data_addr = anv_address_physical(
.flags = copy_flags,
.num_queries = query_count,
.num_items = num_items,
.query_base = first_query,
.query_stride = pool->stride,
.query_data_offset = data_offset,
.destination_stride = dest_stride,
.query_data_addr = anv_address_physical(
(struct anv_address) {
.bo = pool->bo,
}),
.destination_addr = anv_address_physical(dest_addr),
.destination_addr = anv_address_physical(dest_addr),
};
genX(emit_simple_shader_dispatch)(&state, query_count, push_data_state);

View File

@@ -18,8 +18,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
subdir('shaders')
inc_anv = include_directories('.')
anv_flags = [
@@ -110,6 +108,7 @@ anv_per_hw_ver_files = files(
'genX_gfx_state.c',
'genX_gpu_memcpy.c',
'genX_init_state.c',
'genX_internal_kernels.c',
'genX_pipeline.c',
'genX_query.c',
'genX_simple_shader.c',
@@ -121,7 +120,7 @@ endif
foreach _gfx_ver : ['90', '110', '120', '125', '200']
libanv_per_hw_ver_libs += static_library(
'anv_per_hw_ver@0@'.format(_gfx_ver),
[anv_per_hw_ver_files, anv_entrypoints[0], anv_internal_spvs, ],
[anv_per_hw_ver_files, anv_entrypoints[0]],
include_directories : [
inc_include, inc_src, inc_intel,
],
@@ -131,7 +130,7 @@ foreach _gfx_ver : ['90', '110', '120', '125', '200']
dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml,
idep_vulkan_util_headers, idep_vulkan_wsi_headers,
idep_vulkan_runtime_headers, idep_intel_driver_ds_headers,
idep_grl,
idep_grl, idep_intel_shaders,
],
)
endforeach
@@ -207,6 +206,7 @@ anv_deps = [
idep_vulkan_util_headers,
idep_vulkan_runtime_headers,
idep_vulkan_wsi_headers,
idep_intel_shaders,
]
if with_platform_x11
@@ -234,8 +234,7 @@ libanv_common = static_library(
'anv_common',
[
libanv_files, anv_entrypoints, sha1_h,
gen_xml_pack, float64_spv_h,
anv_internal_spvs,
gen_xml_pack, intel_float64_spv_h,
],
include_directories : [
inc_include, inc_src, inc_intel,
@@ -298,7 +297,7 @@ if with_tests
dependencies : [
dep_thread, dep_dl, dep_m, anv_deps,
idep_nir, idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_runtime,
idep_mesautil, idep_intel_dev,
idep_mesautil, idep_intel_dev, idep_intel_shaders,
],
c_args : anv_flags,
gnu_symbol_visibility : 'hidden',
@@ -328,6 +327,7 @@ if with_tests
idep_gtest, dep_libdrm, dep_thread, dep_m, dep_valgrind,
idep_vulkan_util, idep_vulkan_wsi_headers,
idep_vulkan_runtime, idep_intel_driver_ds, idep_intel_dev,
idep_intel_shaders,
],
include_directories : [
inc_include, inc_src, inc_intel,

View File

@@ -1,160 +0,0 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "interface.h"
/* All storage bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint indirect_data[];
};
layout(set = 0, binding = 1, std430) buffer Storage1 {
uint commands[];
};
layout(set = 0, binding = 2, std430) buffer Storage2 {
uint draw_ids[];
};
/* We're not using a uniform block for this because our compiler
* infrastructure relies on UBOs to be 32-bytes aligned so that we can push
* them into registers. This value can come directly from the indirect buffer
* given to indirect draw commands and the requirement there is 4-bytes
* alignment.
*
* Also use a prefix to the variable to remember to make a copy of it, avoid
* unnecessary accesses.
*/
layout(set = 0, binding = 3) buffer Storage3 {
uint _draw_count;
};
/* This data will be provided through push constants. */
layout(set = 0, binding = 4) uniform block {
anv_generated_indirect_draw_params params;
};
void write_VERTEX_BUFFER_STATE(uint write_offset,
uint mocs,
uint buffer_idx,
uint64_t address,
uint size)
{
commands[write_offset + 0] = (0 << 0 | /* Buffer Pitch */
0 << 13 | /* Null Vertex Buffer */
1 << 14 | /* Address Modify Enable */
mocs << 16 | /* MOCS */
buffer_idx << 26); /* Vertex Buffer Index */
commands[write_offset + 1] = uint(address & 0xffffffff);
commands[write_offset + 2] = uint(address >> 32);
commands[write_offset + 3] = size;
}
void write_3DPRIMITIVE(uint write_offset,
bool is_predicated,
bool is_indexed,
uint vertex_count_per_instance,
uint start_vertex_location,
uint instance_count,
uint start_instance_location,
uint base_vertex_location)
{
commands[write_offset + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
uint(is_predicated) << 8 |
5 << 0); /* DWord Length */
commands[write_offset + 1] = uint(is_indexed) << 8;
commands[write_offset + 2] = vertex_count_per_instance;
commands[write_offset + 3] = start_vertex_location;
commands[write_offset + 4] = instance_count;
commands[write_offset + 5] = start_instance_location;
commands[write_offset + 6] = base_vertex_location;
}
void write_3DPRIMITIVE_EXTENDED(uint write_offset,
bool is_predicated,
bool is_indexed,
bool use_tbimr,
uint vertex_count_per_instance,
uint start_vertex_location,
uint instance_count,
uint start_instance_location,
uint base_vertex_location,
uint param_base_vertex,
uint param_base_instance,
uint param_draw_id)
{
commands[write_offset + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
uint(use_tbimr) << 13 |
1 << 11 | /* Extended Parameter Enable */
uint(is_predicated) << 8 |
8 << 0); /* DWord Length */
commands[write_offset + 1] = uint(is_indexed) << 8;
commands[write_offset + 2] = vertex_count_per_instance;
commands[write_offset + 3] = start_vertex_location;
commands[write_offset + 4] = instance_count;
commands[write_offset + 5] = start_instance_location;
commands[write_offset + 6] = base_vertex_location;
commands[write_offset + 7] = param_base_vertex;
commands[write_offset + 8] = param_base_instance;
commands[write_offset + 9] = param_draw_id;
}
void write_MI_BATCH_BUFFER_START(uint write_offset,
uint64_t addr)
{
commands[write_offset + 0] = (0 << 29 | /* Command Type */
49 << 23 | /* MI Command Opcode */
1 << 8 | /* Address Space Indicator (PPGTT) */
1 << 0); /* DWord Length */
commands[write_offset + 1] = uint(addr & 0xffffffff);
commands[write_offset + 2] = uint(addr >> 32);
}
void end_generated_draws(uint item_idx, uint cmd_idx, uint draw_id, uint draw_count)
{
uint _3dprim_dw_size = (params.flags >> 16) & 0xff;
bool indirect_count = (params.flags & ANV_GENERATED_FLAG_COUNT) != 0;
bool ring_mode = (params.flags & ANV_GENERATED_FLAG_RING_MODE) != 0;
/* We can have an indirect draw count = 0. */
uint last_draw_id = draw_count == 0 ? 0 : (min(draw_count, params.max_draw_count) - 1);
uint jump_offset = draw_count == 0 ? 0 : _3dprim_dw_size;
if (ring_mode) {
if (draw_id == last_draw_id) {
/* Exit the ring buffer to the next user commands */
write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.end_addr);
} else if (item_idx == (params.ring_count - 1)) {
/* Jump back to the generation shader to generate mode draws */
write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.gen_addr);
}
} else {
if (draw_id == last_draw_id && draw_count < params.max_draw_count) {
/* Skip forward to the end of the generated draws */
write_MI_BATCH_BUFFER_START(cmd_idx + jump_offset, params.end_addr);
}
}
}

View File

@@ -1,105 +0,0 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "interface.h"
/* These 3 bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint query_data[];
};
layout(set = 0, binding = 1, std430) buffer Storage1 {
uint destination[];
};
/* This data will be provided through push constants. */
layout(set = 0, binding = 2) uniform block {
anv_query_copy_shader_params params;
};
void query_copy(uint item_idx)
{
if (item_idx >= params.num_queries)
return;
bool is_result64 = (params.flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0;
bool write_available = (params.flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0;
bool compute_delta = (params.flags & ANV_COPY_QUERY_FLAG_DELTA) != 0;
bool partial_result = (params.flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0;
uint query_byte = (params.query_base + item_idx) * params.query_stride;
uint query_data_byte = query_byte + params.query_data_offset;
uint destination_byte = item_idx * params.destination_stride;
uint64_t availability = query_data[query_byte / 4];
uint query_data_dword = query_data_byte / 4;
uint dest_dword = destination_byte / 4;
for (uint i = 0; i < params.num_items; i++) {
uint item_data_dword = query_data_dword + i * 2 * (compute_delta ? 2 : 1);
uint64_t v;
if (compute_delta) {
uint64_t v0 = uint64_t(query_data[item_data_dword + 0]) |
(uint64_t(query_data[item_data_dword + 1]) << 32);
uint64_t v1 = uint64_t(query_data[item_data_dword + 2]) |
(uint64_t(query_data[item_data_dword + 3]) << 32);
v = v1 - v0;
} else {
v = uint64_t(query_data[item_data_dword + 0]) |
(uint64_t(query_data[item_data_dword + 1]) << 32);
}
/* vkCmdCopyQueryPoolResults:
*
* "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is
* unavailable, an intermediate result between zero and the final
* result value is written for that query."
*
* We write 0 as the values not being written yet, we can't really make
* provide any sensible value.
*/
if (partial_result && availability == 0)
v = 0;
if (is_result64) {
destination[dest_dword + 0] = uint(v & 0xffffffff);
destination[dest_dword + 1] = uint(v >> 32);
dest_dword += 2;
} else {
destination[dest_dword + 0] = uint(v & 0xffffffff);
dest_dword += 1;
}
}
if (write_available) {
if (is_result64) {
destination[dest_dword + 0] = uint(availability & 0xffffffff);
destination[dest_dword + 1] = uint(availability >> 32);
} else {
destination[dest_dword + 0] = uint(availability & 0xffffffff);
}
}
}

View File

@@ -1,201 +0,0 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#extension GL_GOOGLE_include_directive : enable
#include "common_generated_draws.glsl"
void gfx11_write_draw(uint item_idx, uint cmd_idx, uint draw_id)
{
bool is_indexed = (params.flags & ANV_GENERATED_FLAG_INDEXED) != 0;
bool is_predicated = (params.flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
bool use_tbimr = (params.flags & ANV_GENERATED_FLAG_TBIMR) != 0;
uint indirect_data_offset = draw_id * params.indirect_data_stride / 4;
if (is_indexed) {
/* Loading a VkDrawIndexedIndirectCommand */
uint index_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
uint first_index = indirect_data[indirect_data_offset + 2];
uint vertex_offset = indirect_data[indirect_data_offset + 3];
uint first_instance = indirect_data[indirect_data_offset + 4];
write_3DPRIMITIVE_EXTENDED(cmd_idx,
is_predicated,
is_indexed,
use_tbimr,
index_count,
first_index,
instance_count,
first_instance,
vertex_offset,
vertex_offset,
first_instance,
draw_id);
} else {
/* Loading a VkDrawIndirectCommand structure */
uint vertex_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
uint first_vertex = indirect_data[indirect_data_offset + 2];
uint first_instance = indirect_data[indirect_data_offset + 3];
write_3DPRIMITIVE_EXTENDED(cmd_idx,
is_predicated,
is_indexed,
use_tbimr,
vertex_count,
first_vertex,
instance_count,
first_instance,
0 /* base_vertex_location */,
first_vertex,
first_instance,
draw_id);
}
}
void gfx9_write_draw(uint item_idx, uint cmd_idx, uint draw_id)
{
bool is_indexed = (params.flags & ANV_GENERATED_FLAG_INDEXED) != 0;
bool is_predicated = (params.flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
bool uses_base = (params.flags & ANV_GENERATED_FLAG_BASE) != 0;
bool uses_drawid = (params.flags & ANV_GENERATED_FLAG_DRAWID) != 0;
uint mocs = (params.flags >> 8) & 0xff;
uint indirect_data_offset = draw_id * params.indirect_data_stride / 4;
if (is_indexed) {
/* Loading a VkDrawIndexedIndirectCommand */
uint index_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
uint first_index = indirect_data[indirect_data_offset + 2];
uint vertex_offset = indirect_data[indirect_data_offset + 3];
uint first_instance = indirect_data[indirect_data_offset + 4];
if (uses_base || uses_drawid) {
uint state_vertex_len =
1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
commands[cmd_idx] =
(3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
0 << 24 | /* 3D Command Opcode */
8 << 16 | /* 3D Command Sub Opcode */
(state_vertex_len - 2) << 0); /* DWord Length */
cmd_idx += 1;
if (uses_base) {
uint64_t indirect_draw_data_addr =
params.indirect_data_addr + item_idx * params.indirect_data_stride + 12;
write_VERTEX_BUFFER_STATE(cmd_idx,
mocs,
31,
indirect_draw_data_addr,
8);
cmd_idx += 4;
}
if (uses_drawid) {
uint64_t draw_idx_addr = params.draw_id_addr + 4 * item_idx;
draw_ids[item_idx] = draw_id;
write_VERTEX_BUFFER_STATE(cmd_idx,
mocs,
32,
draw_idx_addr,
4);
cmd_idx += 4;
}
}
write_3DPRIMITIVE(cmd_idx,
is_predicated,
is_indexed,
index_count,
first_index,
instance_count,
first_instance,
vertex_offset);
} else {
/* Loading a VkDrawIndirectCommand structure */
uint vertex_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * params.instance_multiplier;
uint first_vertex = indirect_data[indirect_data_offset + 2];
uint first_instance = indirect_data[indirect_data_offset + 3];
if (uses_base || uses_drawid) {
uint state_vertex_len =
1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
commands[cmd_idx] =
(3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
0 << 24 | /* 3D Command Opcode */
8 << 16 | /* 3D Command Sub Opcode */
(state_vertex_len - 2) << 0); /* DWord Length */
cmd_idx += 1;
if (uses_base) {
uint64_t indirect_draw_data_addr =
params.indirect_data_addr + item_idx * params.indirect_data_stride + 8;
write_VERTEX_BUFFER_STATE(cmd_idx,
mocs,
31,
indirect_draw_data_addr,
8);
cmd_idx += 4;
}
if (uses_drawid) {
uint64_t draw_idx_addr = params.draw_id_addr + 4 * item_idx;
draw_ids[item_idx] = draw_id;
write_VERTEX_BUFFER_STATE(cmd_idx,
mocs,
32,
draw_idx_addr,
4);
cmd_idx += 4;
}
}
write_3DPRIMITIVE(cmd_idx,
is_predicated,
is_indexed,
vertex_count,
first_vertex,
instance_count,
first_instance,
0 /* base_vertex_location */);
}
}
void main()
{
uint _3dprim_dw_size = (params.flags >> 16) & 0xff;
uint gfx_ver = (params.flags >> 24) & 0xff;
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
uint cmd_idx = item_idx * _3dprim_dw_size;
uint draw_id = params.draw_base + item_idx;
uint draw_count = _draw_count;
if (draw_id < min(draw_count, params.max_draw_count)) {
if (gfx_ver == 9)
gfx9_write_draw(item_idx, cmd_idx, draw_id);
else
gfx11_write_draw(item_idx, cmd_idx, draw_id);
}
end_generated_draws(item_idx, cmd_idx, draw_id, draw_count);
}

View File

@@ -1,121 +0,0 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef _SHADERS_INTERFACE_H_
#define _SHADERS_INTERFACE_H_
#ifdef _IN_SHADER_
#define BITFIELD_BIT(i) (1u << i)
#define uint32_t uint
#define PACKED
#else
#include "util/macros.h"
#include <stdint.h>
#endif
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
/* Only used on Gfx9, means the pipeline is using gl_DrawID */
#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2)
/* Only used on Gfx9, means the pipeline is using gl_BaseVertex or
* gl_BaseInstance
*/
#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3)
/* Whether the count is indirect */
#define ANV_GENERATED_FLAG_COUNT BITFIELD_BIT(4)
/* Whether the generation shader writes to the ring buffer */
#define ANV_GENERATED_FLAG_RING_MODE BITFIELD_BIT(5)
/* Whether TBIMR tile-based rendering shall be enabled. */
#define ANV_GENERATED_FLAG_TBIMR BITFIELD_BIT(6)
struct PACKED anv_generated_indirect_draw_params {
/* Draw ID buffer address (only used on Gfx9) */
uint64_t draw_id_addr;
/* Indirect data buffer address (only used on Gfx9) */
uint64_t indirect_data_addr;
/* Stride between each elements of the indirect data buffer */
uint32_t indirect_data_stride;
uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws, 24-31: gfx_ver */
/* Base number of the draw ID, it is added to the index computed from the
* gl_FragCoord
*/
uint32_t draw_base;
/* Maximum number of draws (equals to draw_count for indirect draws without
* an indirect count)
*/
uint32_t max_draw_count;
/* Number of draws to generate in the ring buffer (only useful in ring
* buffer mode)
*/
uint32_t ring_count;
/* Instance multiplier for multi view */
uint32_t instance_multiplier;
/* Address where to jump at to generate further draws (used with ring mode)
*/
uint64_t gen_addr;
/* Address where to jump at after the generated draw (only used with
* indirect draw count variants)
*/
uint64_t end_addr;
};
#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0)
#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2)
#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3)
/* This needs to match common_query_copy.glsl :
*
* layout(set = 0, binding = 2) uniform block
*/
struct PACKED anv_query_copy_shader_params {
/* ANV_COPY_QUERY_FLAG_* flags */
uint32_t flags;
/* Number of queries to copy */
uint32_t num_queries;
/* Number of items to write back in the results per query */
uint32_t num_items;
/* First query to copy result from */
uint32_t query_base;
/* Query stride in bytes */
uint32_t query_stride;
/* Offset at which the data should be read from */
uint32_t query_data_offset;
/* Stride of destination writes */
uint32_t destination_stride;
/* We need to be 64 bit aligned, or 32 bit builds get
* very unhappy.
*/
uint32_t padding;
};
#endif /* _SHADERS_INTERFACE_H_ */

View File

@@ -1,64 +0,0 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#extension GL_GOOGLE_include_directive : enable
layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
/* These 2 bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint src[];
};
layout(set = 0, binding = 1, std430) buffer Storage1 {
uint dst[];
};
/* This data will be provided through push constants. */
layout(set = 0, binding = 2) uniform block {
uint num_dwords;
uint pad;
};
void main()
{
uint idx = gl_GlobalInvocationID.x * 4;
/* Try to do copies in single message as much as possible. */
if (idx + 4 <= num_dwords) {
dst[idx + 0] = src[idx + 0];
dst[idx + 1] = src[idx + 1];
dst[idx + 2] = src[idx + 2];
dst[idx + 3] = src[idx + 3];
} else if (idx + 3 <= num_dwords) {
dst[idx + 0] = src[idx + 0];
dst[idx + 1] = src[idx + 1];
dst[idx + 2] = src[idx + 2];
} else if (idx + 2 <= num_dwords) {
dst[idx + 0] = src[idx + 0];
dst[idx + 1] = src[idx + 1];
} else if (idx + 1 <= num_dwords) {
dst[idx + 0] = src[idx + 0];
}
}

View File

@@ -1,63 +0,0 @@
# Copyright © 2022 Intel Corporation
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
float64_spv_h = custom_target(
'float64_spv.h',
input : [glsl2spirv, float64_glsl_file],
output : 'float64_spv.h',
command : [
prog_python, '@INPUT@', '@OUTPUT@',
prog_glslang,
'--create-entry', 'main',
'--vn', 'float64_spv_source',
'--glsl-version', '450',
'-Olib',
]
)
anv_internal_shaders = [
[ 'generated_draws.glsl', 'frag' ],
[ 'query_copy_compute.glsl', 'comp' ],
[ 'query_copy_fragment.glsl', 'frag' ],
[ 'memcpy_compute.glsl', 'comp' ],
]
anv_internal_spvs = []
foreach item : anv_internal_shaders
f = item[0]
stage = item[1]
spv_filename = f.replace('.glsl', '_spv.h')
src_name = f.replace('.glsl', '_spv_source')
anv_internal_spvs += custom_target(
spv_filename,
input : [glsl2spirv, f, files('common_generated_draws.glsl',
'common_query_copy.glsl',
'interface.h')],
output : spv_filename,
command : [
prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@',
prog_glslang,
'--vn', src_name,
'--glsl-version', '450',
'--stage', stage,
'-I' + meson.current_source_dir(),
'-D_IN_SHADER_=1',
])
endforeach

View File

@@ -1,35 +0,0 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#extension GL_GOOGLE_include_directive : enable
#include "common_query_copy.glsl"
layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
void main()
{
query_copy(gl_GlobalInvocationID.x);
}

View File

@@ -1,33 +0,0 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#extension GL_GOOGLE_include_directive : enable
#include "common_query_copy.glsl"
void main()
{
query_copy(uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x));
}