anv: add shaders for copying query results

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23074>
This commit is contained in:
Lionel Landwerlin
2023-05-16 21:59:33 +03:00
parent 4cee8ce7a5
commit 930e862af7
8 changed files with 359 additions and 11 deletions

View File

@@ -35,6 +35,8 @@
#include "shaders/gfx9_generated_draws_spv.h"
#include "shaders/gfx11_generated_draws_spv.h"
#include "shaders/query_copy_compute_spv.h"
#include "shaders/query_copy_fragment_spv.h"
static bool
lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
@@ -107,6 +109,47 @@ lower_vulkan_descriptors(nir_shader *shader,
(void *)bind_map);
}
static bool
lower_base_workgroup_id(nir_builder *b, nir_instr *instr, UNUSED void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id)
return false;
b->cursor = nir_instr_remove(&intrin->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_imm_zero(b, 3, 32));
return true;
}
static bool
lower_load_ubo_to_uniforms(nir_builder *b, nir_instr *instr, void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_ubo)
return false;
b->cursor = nir_instr_remove(instr);
nir_ssa_def_rewrite_uses(
&intrin->dest.ssa,
nir_load_uniform(b,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size,
intrin->src[1].ssa,
.base = 0,
.range = intrin->dest.ssa.num_components *
intrin->dest.ssa.bit_size / 8));
return true;
}
static struct anv_shader_bin *
compile_upload_spirv(struct anv_device *device,
gl_shader_stage stage,
@@ -160,6 +203,15 @@ compile_upload_spirv(struct anv_device *device,
.use_fragcoord_sysval = true,
.use_layer_id_sysval = true,
});
} else {
nir_lower_compute_system_values_options options = {
.has_base_workgroup_id = true,
.lower_cs_local_id_to_index = true,
.lower_workgroup_id_to_index = true,
};
NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
NIR_PASS_V(nir, nir_shader_instructions_pass, lower_base_workgroup_id,
nir_metadata_block_index | nir_metadata_dominance, NULL);
}
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
@@ -185,6 +237,15 @@ compile_upload_spirv(struct anv_device *device,
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_opt_dce);
if (stage == MESA_SHADER_COMPUTE) {
NIR_PASS_V(nir, nir_shader_instructions_pass,
lower_load_ubo_to_uniforms,
nir_metadata_block_index | nir_metadata_dominance,
NULL);
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
nir->num_uniforms = bind_map->push_data_size;
}
union brw_any_prog_key key;
memset(&key, 0, sizeof(key));
@@ -320,6 +381,61 @@ anv_device_init_internal_kernels(struct anv_device *device)
.push_constant = true,
},
},
.push_data_size = sizeof(struct anv_generated_indirect_params),
},
},
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
.key = {
.name = "anv-copy-query-compute",
},
.stage = MESA_SHADER_COMPUTE,
.spirv_data = query_copy_compute_spv_source,
.spirv_size = ARRAY_SIZE(query_copy_compute_spv_source),
.send_count = device->info->verx10 >= 125 ?
9 /* 4 loads + 4 stores + 1 EOT */ :
8 /* 3 loads + 4 stores + 1 EOT */,
.bind_map = {
.num_bindings = 3,
.bindings = {
{
.address_offset = offsetof(struct anv_query_copy_params,
query_data_addr),
},
{
.address_offset = offsetof(struct anv_query_copy_params,
destination_addr),
},
{
.push_constant = true,
},
},
.push_data_size = sizeof(struct anv_query_copy_params),
},
},
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
.key = {
.name = "anv-copy-query-fragment",
},
.stage = MESA_SHADER_FRAGMENT,
.spirv_data = query_copy_fragment_spv_source,
.spirv_size = ARRAY_SIZE(query_copy_fragment_spv_source),
.send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
.bind_map = {
.num_bindings = 3,
.bindings = {
{
.address_offset = offsetof(struct anv_query_copy_params,
query_data_addr),
},
{
.address_offset = offsetof(struct anv_query_copy_params,
destination_addr),
},
{
.push_constant = true,
},
},
.push_data_size = sizeof(struct anv_query_copy_params),
},
},
};

View File

@@ -33,7 +33,7 @@
/* This needs to match common_generated_draws.glsl :
*
* layout(set = 0, binding = 2) uniform block
* layout(set = 0, binding = 3) uniform block
*/
struct anv_generated_indirect_draw_params {
/* Draw ID buffer address (only used on Gfx9) */
@@ -84,4 +84,44 @@ struct anv_generated_indirect_params {
struct anv_generated_indirect_params *prev;
};
#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0)
#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2)
#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3)
/* This needs to match common_query_copy.glsl :
*
* layout(set = 0, binding = 2) uniform block
*/
struct anv_query_copy_shader_params {
/* ANV_COPY_QUERY_FLAG_* flags */
uint32_t flags;
/* Number of queries to copy */
uint32_t num_queries;
/* Number of items to write back in the results per query */
uint32_t num_items;
/* First query to copy result from */
uint query_base;
/* Query stride in bytes */
uint query_stride;
/* Offset at which the data should be read from */
uint query_data_offset;
/* Stride of destination writes */
uint destination_stride;
};
struct anv_query_copy_params {
struct anv_query_copy_shader_params copy;
uint64_t query_data_addr;
uint64_t destination_addr;
};
#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */

View File

@@ -1115,6 +1115,8 @@ anv_load_fp64_shader(struct anv_device *device);
enum anv_internal_kernel_name {
ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
ANV_INTERNAL_KERNEL_COUNT,
};
@@ -1130,6 +1132,7 @@ struct anv_internal_kernel_bind_map {
*/
uint32_t address_offset;
} bindings[5];
uint32_t push_data_size;
};
enum anv_rt_bvh_build_method {

View File

@@ -116,7 +116,7 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
_gfx_ver = g[0]
libanv_per_hw_ver_libs += static_library(
'anv_per_hw_ver@0@'.format(_gfx_ver),
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], generated_draws_spvs, ],
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], anv_internal_spvs, ],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
],
@@ -220,7 +220,7 @@ libanv_common = static_library(
[
libanv_files, anv_entrypoints, sha1_h,
gen_xml_pack, float64_spv_h,
generated_draws_spvs,
anv_internal_spvs,
],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,

View File

@@ -0,0 +1,116 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#define BITFIELD_BIT(i) (1u << i)
#define ANV_COPY_QUERY_FLAG_RESULT64 BITFIELD_BIT(0)
#define ANV_COPY_QUERY_FLAG_AVAILABLE BITFIELD_BIT(1)
#define ANV_COPY_QUERY_FLAG_DELTA BITFIELD_BIT(2)
#define ANV_COPY_QUERY_FLAG_PARTIAL BITFIELD_BIT(3)
/* These 3 bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint query_data[];
};
layout(set = 0, binding = 1, std430) buffer Storage1 {
uint destination[];
};
/* This data will be provided through push constants. */
layout(set = 0, binding = 2) uniform block {
uint flags;
uint num_queries;
uint num_items;
uint query_base;
uint query_stride;
uint query_data_offset;
uint destination_stride;
};
void query_copy(uint item_idx)
{
if (item_idx >= num_queries)
return;
bool is_result64 = (flags & ANV_COPY_QUERY_FLAG_RESULT64) != 0;
bool write_available = (flags & ANV_COPY_QUERY_FLAG_AVAILABLE) != 0;
bool compute_delta = (flags & ANV_COPY_QUERY_FLAG_DELTA) != 0;
bool partial_result = (flags & ANV_COPY_QUERY_FLAG_PARTIAL) != 0;
uint query_byte = (query_base + item_idx) * query_stride;
uint query_data_byte = query_byte + query_data_offset;
uint destination_byte = item_idx * destination_stride;
uint64_t availability = query_data[query_byte / 4];
uint query_data_dword = query_data_byte / 4;
uint dest_dword = destination_byte / 4;
for (uint i = 0; i < num_items; i++) {
uint item_data_dword = query_data_dword + i * 2 * (compute_delta ? 2 : 1);
uint64_t v;
if (compute_delta) {
uint64_t v0 = uint64_t(query_data[item_data_dword + 0]) |
(uint64_t(query_data[item_data_dword + 1]) << 32);
uint64_t v1 = uint64_t(query_data[item_data_dword + 2]) |
(uint64_t(query_data[item_data_dword + 3]) << 32);
v = v1 - v0;
} else {
v = uint64_t(query_data[item_data_dword + 0]) |
(uint64_t(query_data[item_data_dword + 1]) << 32);
}
/* vkCmdCopyQueryPoolResults:
*
* "If VK_QUERY_RESULT_PARTIAL_BIT is set, then for any query that is
* unavailable, an intermediate result between zero and the final
* result value is written for that query."
*
* We write 0 as the values not being written yet, we can't really make
* provide any sensible value.
*/
if (partial_result && availability == 0)
v = 0;
if (is_result64) {
destination[dest_dword + 0] = uint(v & 0xffffffff);
destination[dest_dword + 1] = uint(v >> 32);
dest_dword += 2;
} else {
destination[dest_dword + 0] = uint(v & 0xffffffff);
dest_dword += 1;
}
}
if (write_available) {
if (is_result64) {
destination[dest_dword + 0] = uint(availability & 0xffffffff);
destination[dest_dword + 1] = uint(availability >> 32);
} else {
destination[dest_dword + 0] = uint(availability & 0xffffffff);
}
}
}

View File

@@ -32,25 +32,30 @@ float64_spv_h = custom_target(
]
)
generated_draws_shaders = [
'gfx9_generated_draws.glsl',
'gfx11_generated_draws.glsl',
anv_internal_shaders = [
[ 'gfx9_generated_draws.glsl', 'frag' ],
[ 'gfx11_generated_draws.glsl', 'frag' ],
[ 'query_copy_compute.glsl', 'comp' ],
[ 'query_copy_fragment.glsl', 'frag' ],
]
generated_draws_spvs = []
foreach f : generated_draws_shaders
anv_internal_spvs = []
foreach item : anv_internal_shaders
f = item[0]
stage = item[1]
spv_filename = f.replace('.glsl', '_spv.h')
src_name = f.replace('.glsl', '_spv_source')
generated_draws_spvs += custom_target(
anv_internal_spvs += custom_target(
spv_filename,
input : [glsl2spirv, f, files('common_generated_draws.glsl')],
input : [glsl2spirv, f, files('common_generated_draws.glsl',
'common_query_copy.glsl')],
output : spv_filename,
command : [
prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@',
prog_glslang,
'--vn', src_name,
'--glsl-version', '450',
'--stage', 'frag',
'--stage', stage,
'-I' + meson.current_source_dir(),
])
endforeach

View File

@@ -0,0 +1,35 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#extension GL_GOOGLE_include_directive : enable
#include "common_query_copy.glsl"
layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
void main()
{
query_copy(gl_GlobalInvocationID.x);
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#extension GL_GOOGLE_include_directive : enable
#include "common_query_copy.glsl"
void main()
{
query_copy(uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x));
}