nvk: rewrite query copy shader in CL C
as previously discussed. this is using "library CL" instead of kernel CL, which is the older way of doing things. it works, it just has more boilerplate per-kernel than we'd want otherwise. but library CL is basically free to integrate into a driver, whereas kernel CL requires a lot more upfront investment. (I'm working on cleaning that up but we're not quite there yet.) Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33362>
This commit is contained in:

committed by
Marge Bot

parent
c249556bf4
commit
feedd427b3
@@ -834,6 +834,7 @@ with_driver_using_cl = [
|
||||
with_gallium_iris, with_intel_vk,
|
||||
with_gallium_asahi, with_asahi_vk,
|
||||
with_gallium_panfrost, with_panfrost_vk,
|
||||
with_nouveau_vk,
|
||||
].contains(true)
|
||||
|
||||
if get_option('mesa-clc') == 'system'
|
||||
|
53
src/nouveau/vulkan/cl/nvk_query.cl
Normal file
53
src/nouveau/vulkan/cl/nvk_query.cl
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
#include "nvk_query.h"
|
||||
|
||||
void
|
||||
nvk_copy_queries(uint64_t pool_addr, uint query_start, uint query_stride,
|
||||
uint first_query, uint query_count, uint64_t dst_addr,
|
||||
uint64_t dst_stride, uint flags)
|
||||
{
|
||||
uint i = get_sub_group_local_id() + cl_group_id.x * 32;
|
||||
if (i >= query_count)
|
||||
return;
|
||||
|
||||
uint query = first_query + i;
|
||||
global uint *available_arr = (global uint *)pool_addr;
|
||||
bool available = available_arr[query] != 0;
|
||||
bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
|
||||
|
||||
uint64_t report_offs = query_start + (uint64_t)query * (uint64_t)query_stride;
|
||||
global struct nvk_query_report *report =
|
||||
(global void *)(pool_addr + report_offs);
|
||||
|
||||
uint64_t dst_offset = dst_stride * (uint64_t)i;
|
||||
uint num_reports = 1;
|
||||
|
||||
if (query_stride == sizeof(struct nvk_query_report)) {
|
||||
/* Timestamp queries are the only ones use a single report */
|
||||
if (write_results) {
|
||||
vk_write_query(dst_addr + dst_offset, 0, flags, report->timestamp);
|
||||
}
|
||||
} else {
|
||||
/* Everything that isn't a timestamp has the invariant that the
|
||||
* number of destination entries is equal to the query stride divided
|
||||
* by the size of two reports.
|
||||
*/
|
||||
num_reports = query_stride / (2 * sizeof(struct nvk_query_report));
|
||||
|
||||
if (write_results) {
|
||||
for (uint r = 0; r < num_reports; ++r) {
|
||||
uint delta = report[(r * 2) + 1].value - report[r * 2].value;
|
||||
|
||||
vk_write_query(dst_addr + dst_offset, r, flags, delta);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
|
||||
vk_write_query(dst_addr + dst_offset, num_reports, flags, available);
|
||||
}
|
||||
}
|
12
src/nouveau/vulkan/cl/nvk_query.h
Normal file
12
src/nouveau/vulkan/cl/nvk_query.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "compiler/libcl/libcl.h"
|
||||
|
||||
struct nvk_query_report {
|
||||
uint64_t value;
|
||||
uint64_t timestamp;
|
||||
};
|
@@ -75,6 +75,10 @@ nvk_files = files(
|
||||
'nvkmd/nvkmd.h',
|
||||
)
|
||||
|
||||
nvkcl_files = files(
|
||||
'cl/nvk_query.cl',
|
||||
)
|
||||
|
||||
nvk_entrypoints = custom_target(
|
||||
'nvk_entrypoints',
|
||||
input : [vk_entrypoints_gen, vk_api_xml],
|
||||
@@ -87,6 +91,26 @@ nvk_entrypoints = custom_target(
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
|
||||
relative_dir = fs.relative_to(meson.global_source_root(), meson.global_build_root())
|
||||
|
||||
nvkcl_spv = custom_target(
|
||||
input : nvkcl_files,
|
||||
output : 'nvkcl.spv',
|
||||
command : [
|
||||
prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', nvkcl_files, '--',
|
||||
'-I' + join_paths(meson.current_source_dir(), '.'),
|
||||
'-I' + join_paths(meson.project_source_root(), 'src'),
|
||||
'-fmacro-prefix-map=@0@/='.format(relative_dir),
|
||||
],
|
||||
depfile : '@PLAINNAME@.d',
|
||||
)
|
||||
|
||||
nvkcl = custom_target(
|
||||
input : nvkcl_spv,
|
||||
output : ['nvkcl.cpp', 'nvkcl.h'],
|
||||
command : [prog_vtn_bindgen2, '@INPUT@', '@OUTPUT@'],
|
||||
)
|
||||
|
||||
nvk_deps = [
|
||||
dep_libdrm,
|
||||
idep_nak,
|
||||
@@ -116,6 +140,7 @@ libnvk = static_library(
|
||||
nvk_entrypoints,
|
||||
cl_generated,
|
||||
nvk_files,
|
||||
nvkcl,
|
||||
sha1_h,
|
||||
],
|
||||
include_directories : [
|
||||
|
@@ -17,8 +17,10 @@
|
||||
#include "vk_meta.h"
|
||||
#include "vk_pipeline.h"
|
||||
|
||||
#include "cl/nvk_query.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "nvkcl.h"
|
||||
|
||||
#include "util/os_time.h"
|
||||
|
||||
@@ -27,11 +29,6 @@
|
||||
#include "nv_push_cla0c0.h"
|
||||
#include "nv_push_clc597.h"
|
||||
|
||||
struct nvk_query_report {
|
||||
uint64_t value;
|
||||
uint64_t timestamp;
|
||||
};
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
nvk_CreateQueryPool(VkDevice device,
|
||||
const VkQueryPoolCreateInfo *pCreateInfo,
|
||||
@@ -118,14 +115,6 @@ nvk_query_available_addr(struct nvk_query_pool *pool, uint32_t query)
|
||||
return pool->mem->va->addr + query * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
nvk_nir_available_addr(nir_builder *b, nir_def *pool_addr,
|
||||
nir_def *query)
|
||||
{
|
||||
nir_def *offset = nir_imul_imm(b, query, sizeof(uint32_t));
|
||||
return nir_iadd(b, pool_addr, nir_u2u64(b, offset));
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
nvk_query_available_map(struct nvk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
@@ -146,16 +135,6 @@ nvk_query_report_addr(struct nvk_query_pool *pool, uint32_t query)
|
||||
return pool->mem->va->addr + nvk_query_offset(pool, query);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
nvk_nir_query_report_addr(nir_builder *b, nir_def *pool_addr,
|
||||
nir_def *query_start, nir_def *query_stride,
|
||||
nir_def *query)
|
||||
{
|
||||
nir_def *offset =
|
||||
nir_iadd(b, query_start, nir_umul_2x32_64(b, query, query_stride));
|
||||
return nir_iadd(b, pool_addr, offset);
|
||||
}
|
||||
|
||||
static struct nvk_query_report *
|
||||
nvk_query_report_map(struct nvk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
@@ -715,135 +694,6 @@ load_struct_var(nir_builder *b, nir_variable *var, uint32_t field)
|
||||
return nir_load_deref(b, deref);
|
||||
}
|
||||
|
||||
static void
|
||||
nir_write_query_result(nir_builder *b, nir_def *dst_addr,
|
||||
nir_def *idx, nir_def *flags,
|
||||
nir_def *result)
|
||||
{
|
||||
assert(result->num_components == 1);
|
||||
assert(result->bit_size == 64);
|
||||
|
||||
nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_64_BIT));
|
||||
{
|
||||
nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 8));
|
||||
nir_store_global(b, nir_iadd(b, dst_addr, offset), 8, result, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_def *result32 = nir_u2u32(b, result);
|
||||
nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 4));
|
||||
nir_store_global(b, nir_iadd(b, dst_addr, offset), 4, result32, 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
nir_get_query_delta(nir_builder *b, nir_def *dst_addr,
|
||||
nir_def *report_addr, nir_def *idx,
|
||||
nir_def *flags)
|
||||
{
|
||||
nir_def *offset =
|
||||
nir_imul_imm(b, idx, 2 * sizeof(struct nvk_query_report));
|
||||
nir_def *begin_addr =
|
||||
nir_iadd(b, report_addr, nir_i2i64(b, offset));
|
||||
nir_def *end_addr =
|
||||
nir_iadd_imm(b, begin_addr, sizeof(struct nvk_query_report));
|
||||
|
||||
/* nvk_query_report::timestamp is the first uint64_t */
|
||||
nir_def *begin = nir_load_global(b, begin_addr, 16, 1, 64);
|
||||
nir_def *end = nir_load_global(b, end_addr, 16, 1, 64);
|
||||
|
||||
nir_def *delta = nir_isub(b, end, begin);
|
||||
|
||||
nir_write_query_result(b, dst_addr, idx, flags, delta);
|
||||
}
|
||||
|
||||
static void
|
||||
nvk_nir_copy_query(nir_builder *b, nir_variable *push, nir_def *i)
|
||||
{
|
||||
nir_def *pool_addr = load_struct_var(b, push, 0);
|
||||
nir_def *query_start = nir_u2u64(b, load_struct_var(b, push, 1));
|
||||
nir_def *query_stride = load_struct_var(b, push, 2);
|
||||
nir_def *first_query = load_struct_var(b, push, 3);
|
||||
nir_def *dst_addr = load_struct_var(b, push, 5);
|
||||
nir_def *dst_stride = load_struct_var(b, push, 6);
|
||||
nir_def *flags = load_struct_var(b, push, 7);
|
||||
|
||||
nir_def *query = nir_iadd(b, first_query, i);
|
||||
|
||||
nir_def *avail_addr = nvk_nir_available_addr(b, pool_addr, query);
|
||||
nir_def *available =
|
||||
nir_i2b(b, nir_load_global(b, avail_addr, 4, 1, 32));
|
||||
|
||||
nir_def *partial = nir_test_mask(b, flags, VK_QUERY_RESULT_PARTIAL_BIT);
|
||||
nir_def *write_results = nir_ior(b, available, partial);
|
||||
|
||||
nir_def *report_addr =
|
||||
nvk_nir_query_report_addr(b, pool_addr, query_start, query_stride,
|
||||
query);
|
||||
nir_def *dst_offset = nir_imul(b, nir_u2u64(b, i), dst_stride);
|
||||
|
||||
/* Timestamp queries are the only ones use a single report */
|
||||
nir_def *is_timestamp =
|
||||
nir_ieq_imm(b, query_stride, sizeof(struct nvk_query_report));
|
||||
|
||||
nir_def *one = nir_imm_int(b, 1);
|
||||
nir_def *num_reports;
|
||||
nir_push_if(b, is_timestamp);
|
||||
{
|
||||
nir_push_if(b, write_results);
|
||||
{
|
||||
/* This is the timestamp case. We add 8 because we're loading
|
||||
* nvk_query_report::timestamp.
|
||||
*/
|
||||
nir_def *timestamp =
|
||||
nir_load_global(b, nir_iadd_imm(b, report_addr, 8), 8, 1, 64);
|
||||
|
||||
nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset),
|
||||
nir_imm_int(b, 0), flags, timestamp);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* Everything that isn't a timestamp has the invariant that the
|
||||
* number of destination entries is equal to the query stride divided
|
||||
* by the size of two reports.
|
||||
*/
|
||||
num_reports = nir_udiv_imm(b, query_stride,
|
||||
2 * sizeof(struct nvk_query_report));
|
||||
|
||||
nir_push_if(b, write_results);
|
||||
{
|
||||
nir_variable *r =
|
||||
nir_local_variable_create(b->impl, glsl_uint_type(), "r");
|
||||
nir_store_var(b, r, nir_imm_int(b, 0), 0x1);
|
||||
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_break_if(b, nir_ige(b, nir_load_var(b, r), num_reports));
|
||||
|
||||
nir_get_query_delta(b, nir_iadd(b, dst_addr, dst_offset),
|
||||
report_addr, nir_load_var(b, r), flags);
|
||||
|
||||
nir_store_var(b, r, nir_iadd_imm(b, nir_load_var(b, r), 1), 0x1);
|
||||
}
|
||||
nir_pop_loop(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
num_reports = nir_if_phi(b, one, num_reports);
|
||||
|
||||
nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
|
||||
{
|
||||
nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset),
|
||||
num_reports, flags, nir_b2i64(b, available));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_copy_queries_shader(void)
|
||||
{
|
||||
@@ -870,16 +720,11 @@ build_copy_queries_shader(void)
|
||||
push_iface_type, "push");
|
||||
|
||||
b->shader->info.workgroup_size[0] = 32;
|
||||
nir_def *wg_id = nir_load_workgroup_id(b);
|
||||
nir_def *i = nir_iadd(b, nir_load_subgroup_invocation(b),
|
||||
nir_imul_imm(b, nir_channel(b, wg_id, 0), 32));
|
||||
|
||||
nir_def *query_count = load_struct_var(b, push, 4);
|
||||
nir_push_if(b, nir_ilt(b, i, query_count));
|
||||
{
|
||||
nvk_nir_copy_query(b, push, i);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nvk_copy_queries(b, load_struct_var(b, push, 0), load_struct_var(b, push, 1),
|
||||
load_struct_var(b, push, 2), load_struct_var(b, push, 3),
|
||||
load_struct_var(b, push, 4), load_struct_var(b, push, 5),
|
||||
load_struct_var(b, push, 6), load_struct_var(b, push, 7));
|
||||
|
||||
return build.shader;
|
||||
}
|
||||
|
Reference in New Issue
Block a user