anv: add shaders for copying query results
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23074>
This commit is contained in:
@@ -35,6 +35,8 @@
|
||||
|
||||
#include "shaders/gfx9_generated_draws_spv.h"
|
||||
#include "shaders/gfx11_generated_draws_spv.h"
|
||||
#include "shaders/query_copy_compute_spv.h"
|
||||
#include "shaders/query_copy_fragment_spv.h"
|
||||
|
||||
static bool
|
||||
lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||
@@ -107,6 +109,47 @@ lower_vulkan_descriptors(nir_shader *shader,
|
||||
(void *)bind_map);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_base_workgroup_id(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_imm_zero(b, 3, 32));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_ubo_to_uniforms(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_ubo)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses(
|
||||
&intrin->dest.ssa,
|
||||
nir_load_uniform(b,
|
||||
intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size,
|
||||
intrin->src[1].ssa,
|
||||
.base = 0,
|
||||
.range = intrin->dest.ssa.num_components *
|
||||
intrin->dest.ssa.bit_size / 8));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct anv_shader_bin *
|
||||
compile_upload_spirv(struct anv_device *device,
|
||||
gl_shader_stage stage,
|
||||
@@ -160,6 +203,15 @@ compile_upload_spirv(struct anv_device *device,
|
||||
.use_fragcoord_sysval = true,
|
||||
.use_layer_id_sysval = true,
|
||||
});
|
||||
} else {
|
||||
nir_lower_compute_system_values_options options = {
|
||||
.has_base_workgroup_id = true,
|
||||
.lower_cs_local_id_to_index = true,
|
||||
.lower_workgroup_id_to_index = true,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
|
||||
NIR_PASS_V(nir, nir_shader_instructions_pass, lower_base_workgroup_id,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
}
|
||||
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
@@ -185,6 +237,15 @@ compile_upload_spirv(struct anv_device *device,
|
||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||
NIR_PASS_V(nir, nir_opt_dce);
|
||||
|
||||
if (stage == MESA_SHADER_COMPUTE) {
|
||||
NIR_PASS_V(nir, nir_shader_instructions_pass,
|
||||
lower_load_ubo_to_uniforms,
|
||||
nir_metadata_block_index | nir_metadata_dominance,
|
||||
NULL);
|
||||
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
|
||||
nir->num_uniforms = bind_map->push_data_size;
|
||||
}
|
||||
|
||||
union brw_any_prog_key key;
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
@@ -320,6 +381,61 @@ anv_device_init_internal_kernels(struct anv_device *device)
|
||||
.push_constant = true,
|
||||
},
|
||||
},
|
||||
.push_data_size = sizeof(struct anv_generated_indirect_params),
|
||||
},
|
||||
},
|
||||
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
|
||||
.key = {
|
||||
.name = "anv-copy-query-compute",
|
||||
},
|
||||
.stage = MESA_SHADER_COMPUTE,
|
||||
.spirv_data = query_copy_compute_spv_source,
|
||||
.spirv_size = ARRAY_SIZE(query_copy_compute_spv_source),
|
||||
.send_count = device->info->verx10 >= 125 ?
|
||||
9 /* 4 loads + 4 stores + 1 EOT */ :
|
||||
8 /* 3 loads + 4 stores + 1 EOT */,
|
||||
.bind_map = {
|
||||
.num_bindings = 3,
|
||||
.bindings = {
|
||||
{
|
||||
.address_offset = offsetof(struct anv_query_copy_params,
|
||||
query_data_addr),
|
||||
},
|
||||
{
|
||||
.address_offset = offsetof(struct anv_query_copy_params,
|
||||
destination_addr),
|
||||
},
|
||||
{
|
||||
.push_constant = true,
|
||||
},
|
||||
},
|
||||
.push_data_size = sizeof(struct anv_query_copy_params),
|
||||
},
|
||||
},
|
||||
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
|
||||
.key = {
|
||||
.name = "anv-copy-query-fragment",
|
||||
},
|
||||
.stage = MESA_SHADER_FRAGMENT,
|
||||
.spirv_data = query_copy_fragment_spv_source,
|
||||
.spirv_size = ARRAY_SIZE(query_copy_fragment_spv_source),
|
||||
.send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
|
||||
.bind_map = {
|
||||
.num_bindings = 3,
|
||||
.bindings = {
|
||||
{
|
||||
.address_offset = offsetof(struct anv_query_copy_params,
|
||||
query_data_addr),
|
||||
},
|
||||
{
|
||||
.address_offset = offsetof(struct anv_query_copy_params,
|
||||
destination_addr),
|
||||
},
|
||||
{
|
||||
.push_constant = true,
|
||||
},
|
||||
},
|
||||
.push_data_size = sizeof(struct anv_query_copy_params),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
@@ -33,7 +33,7 @@
|
||||
|
||||
/* This needs to match common_generated_draws.glsl :
|
||||
*
|
||||
* layout(set = 0, binding = 2) uniform block
|
||||
* layout(set = 0, binding = 3) uniform block
|
||||
*/
|
||||
struct anv_generated_indirect_draw_params {
|
||||
/* Draw ID buffer address (only used on Gfx9) */
|
||||
@@ -84,4 +84,44 @@ struct anv_generated_indirect_params {
|
||||
struct anv_generated_indirect_params *prev;
|
||||
};
|
||||
|
||||
#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0)
|
||||
#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
|
||||
#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2)
|
||||
#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3)
|
||||
|
||||
/* This needs to match common_query_copy.glsl :
|
||||
*
|
||||
* layout(set = 0, binding = 2) uniform block
|
||||
*/
|
||||
struct anv_query_copy_shader_params {
|
||||
/* ANV_COPY_QUERY_FLAG_* flags */
|
||||
uint32_t flags;
|
||||
|
||||
/* Number of queries to copy */
|
||||
uint32_t num_queries;
|
||||
|
||||
/* Number of items to write back in the results per query */
|
||||
uint32_t num_items;
|
||||
|
||||
/* First query to copy result from */
|
||||
uint query_base;
|
||||
|
||||
/* Query stride in bytes */
|
||||
uint query_stride;
|
||||
|
||||
/* Offset at which the data should be read from */
|
||||
uint query_data_offset;
|
||||
|
||||
/* Stride of destination writes */
|
||||
uint destination_stride;
|
||||
};
|
||||
|
||||
struct anv_query_copy_params {
|
||||
struct anv_query_copy_shader_params copy;
|
||||
|
||||
uint64_t query_data_addr;
|
||||
|
||||
uint64_t destination_addr;
|
||||
};
|
||||
|
||||
#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */
|
||||
|
@@ -1115,6 +1115,8 @@ anv_load_fp64_shader(struct anv_device *device);
|
||||
|
||||
enum anv_internal_kernel_name {
|
||||
ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
|
||||
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
|
||||
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
|
||||
|
||||
ANV_INTERNAL_KERNEL_COUNT,
|
||||
};
|
||||
@@ -1130,6 +1132,7 @@ struct anv_internal_kernel_bind_map {
|
||||
*/
|
||||
uint32_t address_offset;
|
||||
} bindings[5];
|
||||
uint32_t push_data_size;
|
||||
};
|
||||
|
||||
enum anv_rt_bvh_build_method {
|
||||
|
@@ -116,7 +116,7 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
|
||||
_gfx_ver = g[0]
|
||||
libanv_per_hw_ver_libs += static_library(
|
||||
'anv_per_hw_ver@0@'.format(_gfx_ver),
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], generated_draws_spvs, ],
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], anv_internal_spvs, ],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
|
||||
],
|
||||
@@ -220,7 +220,7 @@ libanv_common = static_library(
|
||||
[
|
||||
libanv_files, anv_entrypoints, sha1_h,
|
||||
gen_xml_pack, float64_spv_h,
|
||||
generated_draws_spvs,
|
||||
anv_internal_spvs,
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
|
116
src/intel/vulkan/shaders/common_query_copy.glsl
Normal file
116
src/intel/vulkan/shaders/common_query_copy.glsl
Normal file
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright © 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BITFIELD_BIT(i) (1u << i)
|
||||
|
||||
#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0)
|
||||
#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
|
||||
#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2)
|
||||
#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3)
|
||||
|
||||
/* These 3 bindings will be accessed through A64 messages */
|
||||
layout(set = 0, binding = 0, std430) buffer Storage0 {
|
||||
uint query_data[];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1, std430) buffer Storage1 {
|
||||
uint destination[];
|
||||
};
|
||||
|
||||
/* This data will be provided through push constants. */
|
||||
layout(set = 0, binding = 2) uniform block {
|
||||
uint flags;
|
||||
uint num_queries;
|
||||
uint num_items;
|
||||
uint query_base;
|
||||
uint query_stride;
|
||||
uint query_data_offset;
|
||||
uint destination_stride;
|
||||
};
|
||||
|
||||
void query_copy(uint item_idx)
|
||||
{
|
||||
if (item_idx >= num_queries)
|
||||
return;
|
||||
|
||||
bool is_result64 = (flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0;
|
||||
bool write_available = (flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0;
|
||||
bool compute_delta = (flags & ANV_COPY_QUERY_FLAG_DELTA) != 0;
|
||||
bool partial_result = (flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0;
|
||||
|
||||
uint query_byte = (query_base + item_idx) * query_stride;
|
||||
uint query_data_byte = query_byte + query_data_offset;
|
||||
uint destination_byte = item_idx * destination_stride;
|
||||
|
||||
uint64_t availability = query_data[query_byte / 4];
|
||||
|
||||
uint query_data_dword = query_data_byte / 4;
|
||||
uint dest_dword = destination_byte / 4;
|
||||
for (uint i = 0; i < num_items; i++) {
|
||||
uint item_data_dword = query_data_dword + i * 2 * (compute_delta ? 2 : 1);
|
||||
|
||||
uint64_t v;
|
||||
if (compute_delta) {
|
||||
uint64_t v0 = uint64_t(query_data[item_data_dword + 0]) |
|
||||
(uint64_t(query_data[item_data_dword + 1]) << 32);
|
||||
uint64_t v1 = uint64_t(query_data[item_data_dword + 2]) |
|
||||
(uint64_t(query_data[item_data_dword + 3]) << 32);
|
||||
|
||||
v = v1 - v0;
|
||||
} else {
|
||||
|
||||
v = uint64_t(query_data[item_data_dword + 0]) |
|
||||
(uint64_t(query_data[item_data_dword + 1]) << 32);
|
||||
}
|
||||
|
||||
/* vkCmdCopyQueryPoolResults:
|
||||
*
|
||||
* "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is
|
||||
* unavailable, an intermediate result between zero and the final
|
||||
* result value is written for that query."
|
||||
*
|
||||
* We write 0 as the values not being written yet, we can't really make
|
||||
* provide any sensible value.
|
||||
*/
|
||||
if (partial_result && availability == 0)
|
||||
v = 0;
|
||||
|
||||
if (is_result64) {
|
||||
destination[dest_dword + 0] = uint(v & 0xffffffff);
|
||||
destination[dest_dword + 1] = uint(v >> 32);
|
||||
dest_dword += 2;
|
||||
} else {
|
||||
destination[dest_dword + 0] = uint(v & 0xffffffff);
|
||||
dest_dword += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (write_available) {
|
||||
if (is_result64) {
|
||||
destination[dest_dword + 0] = uint(availability & 0xffffffff);
|
||||
destination[dest_dword + 1] = uint(availability >> 32);
|
||||
} else {
|
||||
destination[dest_dword + 0] = uint(availability & 0xffffffff);
|
||||
}
|
||||
}
|
||||
}
|
@@ -32,25 +32,30 @@ float64_spv_h = custom_target(
|
||||
]
|
||||
)
|
||||
|
||||
generated_draws_shaders = [
|
||||
'gfx9_generated_draws.glsl',
|
||||
'gfx11_generated_draws.glsl',
|
||||
anv_internal_shaders = [
|
||||
[ 'gfx9_generated_draws.glsl', 'frag' ],
|
||||
[ 'gfx11_generated_draws.glsl', 'frag' ],
|
||||
[ 'query_copy_compute.glsl', 'comp' ],
|
||||
[ 'query_copy_fragment.glsl', 'frag' ],
|
||||
]
|
||||
|
||||
generated_draws_spvs = []
|
||||
foreach f : generated_draws_shaders
|
||||
anv_internal_spvs = []
|
||||
foreach item : anv_internal_shaders
|
||||
f = item[0]
|
||||
stage = item[1]
|
||||
spv_filename = f.replace('.glsl', '_spv.h')
|
||||
src_name = f.replace('.glsl', '_spv_source')
|
||||
generated_draws_spvs += custom_target(
|
||||
anv_internal_spvs += custom_target(
|
||||
spv_filename,
|
||||
input : [glsl2spirv, f, files('common_generated_draws.glsl')],
|
||||
input : [glsl2spirv, f, files('common_generated_draws.glsl',
|
||||
'common_query_copy.glsl')],
|
||||
output : spv_filename,
|
||||
command : [
|
||||
prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@',
|
||||
prog_glslang,
|
||||
'--vn', src_name,
|
||||
'--glsl-version', '450',
|
||||
'--stage', 'frag',
|
||||
'--stage', stage,
|
||||
'-I' + meson.current_source_dir(),
|
||||
])
|
||||
endforeach
|
||||
|
35
src/intel/vulkan/shaders/query_copy_compute.glsl
Normal file
35
src/intel/vulkan/shaders/query_copy_compute.glsl
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright © 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_gpu_shader_int64 : enable
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "common_query_copy.glsl"
|
||||
|
||||
layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
query_copy(gl_GlobalInvocationID.x);
|
||||
}
|
33
src/intel/vulkan/shaders/query_copy_fragment.glsl
Normal file
33
src/intel/vulkan/shaders/query_copy_fragment.glsl
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright © 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_gpu_shader_int64 : enable
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "common_query_copy.glsl"
|
||||
|
||||
void main()
|
||||
{
|
||||
query_copy(uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x));
|
||||
}
|
Reference in New Issue
Block a user