diff --git a/meson.build b/meson.build index 07991a6eb5e..dfae7eada45 100644 --- a/meson.build +++ b/meson.build @@ -834,6 +834,7 @@ with_driver_using_cl = [ with_gallium_iris, with_intel_vk, with_gallium_asahi, with_asahi_vk, with_gallium_panfrost, with_panfrost_vk, + with_nouveau_vk, ].contains(true) if get_option('mesa-clc') == 'system' diff --git a/src/nouveau/vulkan/cl/nvk_query.cl b/src/nouveau/vulkan/cl/nvk_query.cl new file mode 100644 index 00000000000..b4371b2da09 --- /dev/null +++ b/src/nouveau/vulkan/cl/nvk_query.cl @@ -0,0 +1,53 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * SPDX-License-Identifier: MIT + */ +#include "compiler/libcl/libcl_vk.h" +#include "nvk_query.h" + +void +nvk_copy_queries(uint64_t pool_addr, uint query_start, uint query_stride, + uint first_query, uint query_count, uint64_t dst_addr, + uint64_t dst_stride, uint flags) +{ + uint i = get_sub_group_local_id() + cl_group_id.x * 32; + if (i >= query_count) + return; + + uint query = first_query + i; + global uint *available_arr = (global uint *)pool_addr; + bool available = available_arr[query] != 0; + bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT); + + uint64_t report_offs = query_start + (uint64_t)query * (uint64_t)query_stride; + global struct nvk_query_report *report = + (global void *)(pool_addr + report_offs); + + uint64_t dst_offset = dst_stride * (uint64_t)i; + uint num_reports = 1; + + if (query_stride == sizeof(struct nvk_query_report)) { + /* Timestamp queries are the only ones use a single report */ + if (write_results) { + vk_write_query(dst_addr + dst_offset, 0, flags, report->timestamp); + } + } else { + /* Everything that isn't a timestamp has the invariant that the + * number of destination entries is equal to the query stride divided + * by the size of two reports. + */ + num_reports = query_stride / (2 * sizeof(struct nvk_query_report)); + + if (write_results) { + for (uint r = 0; r < num_reports; ++r) { + uint delta = report[(r * 2) + 1].value - report[r * 2].value; + + vk_write_query(dst_addr + dst_offset, r, flags, delta); + } + } + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + vk_write_query(dst_addr + dst_offset, num_reports, flags, available); + } +} diff --git a/src/nouveau/vulkan/cl/nvk_query.h b/src/nouveau/vulkan/cl/nvk_query.h new file mode 100644 index 00000000000..ca917047251 --- /dev/null +++ b/src/nouveau/vulkan/cl/nvk_query.h @@ -0,0 +1,12 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * SPDX-License-Identifier: MIT + */ +#pragma once + +#include "compiler/libcl/libcl.h" + +struct nvk_query_report { + uint64_t value; + uint64_t timestamp; +}; diff --git a/src/nouveau/vulkan/meson.build b/src/nouveau/vulkan/meson.build index 16e6cc6c581..e20384cd8fc 100644 --- a/src/nouveau/vulkan/meson.build +++ b/src/nouveau/vulkan/meson.build @@ -75,6 +75,10 @@ nvk_files = files( 'nvkmd/nvkmd.h', ) +nvkcl_files = files( + 'cl/nvk_query.cl', +) + nvk_entrypoints = custom_target( 'nvk_entrypoints', input : [vk_entrypoints_gen, vk_api_xml], @@ -87,6 +91,26 @@ nvk_entrypoints = custom_target( depend_files : vk_entrypoints_gen_depend_files, ) +relative_dir = fs.relative_to(meson.global_source_root(), meson.global_build_root()) + +nvkcl_spv = custom_target( + input : nvkcl_files, + output : 'nvkcl.spv', + command : [ + prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', nvkcl_files, '--', + '-I' + join_paths(meson.current_source_dir(), '.'), + '-I' + join_paths(meson.project_source_root(), 'src'), + '-fmacro-prefix-map=@0@/='.format(relative_dir), + ], + depfile : '@PLAINNAME@.d', +) + +nvkcl = custom_target( + input : nvkcl_spv, + output : ['nvkcl.cpp', 'nvkcl.h'], + command : [prog_vtn_bindgen2, '@INPUT@', '@OUTPUT@'], +) + nvk_deps = [ dep_libdrm, idep_nak, @@ -116,6 +140,7 @@ libnvk = static_library( nvk_entrypoints, cl_generated, nvk_files, + nvkcl, sha1_h, ], include_directories : [ diff --git a/src/nouveau/vulkan/nvk_query_pool.c b/src/nouveau/vulkan/nvk_query_pool.c index b2ff64b3ae4..fb13b661417 100644 --- a/src/nouveau/vulkan/nvk_query_pool.c +++ b/src/nouveau/vulkan/nvk_query_pool.c @@ -17,8 +17,10 @@ #include "vk_meta.h" #include "vk_pipeline.h" +#include "cl/nvk_query.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" +#include "nvkcl.h" #include "util/os_time.h" @@ -27,11 +29,6 @@ #include "nv_push_cla0c0.h" #include "nv_push_clc597.h" -struct nvk_query_report { - uint64_t value; - uint64_t timestamp; -}; - VKAPI_ATTR VkResult VKAPI_CALL nvk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo, @@ -118,14 +115,6 @@ nvk_query_available_addr(struct nvk_query_pool *pool, uint32_t query) return pool->mem->va->addr + query * sizeof(uint32_t); } -static nir_def * -nvk_nir_available_addr(nir_builder *b, nir_def *pool_addr, - nir_def *query) -{ - nir_def *offset = nir_imul_imm(b, query, sizeof(uint32_t)); - return nir_iadd(b, pool_addr, nir_u2u64(b, offset)); -} - static uint32_t * nvk_query_available_map(struct nvk_query_pool *pool, uint32_t query) { @@ -146,16 +135,6 @@ nvk_query_report_addr(struct nvk_query_pool *pool, uint32_t query) return pool->mem->va->addr + nvk_query_offset(pool, query); } -static nir_def * -nvk_nir_query_report_addr(nir_builder *b, nir_def *pool_addr, - nir_def *query_start, nir_def *query_stride, - nir_def *query) -{ - nir_def *offset = - nir_iadd(b, query_start, nir_umul_2x32_64(b, query, query_stride)); - return nir_iadd(b, pool_addr, offset); -} - static struct nvk_query_report * nvk_query_report_map(struct nvk_query_pool *pool, uint32_t query) { @@ -715,135 +694,6 @@ load_struct_var(nir_builder *b, nir_variable *var, uint32_t field) return nir_load_deref(b, deref); } -static void -nir_write_query_result(nir_builder *b, nir_def *dst_addr, - nir_def *idx, nir_def *flags, - nir_def *result) -{ - assert(result->num_components == 1); - assert(result->bit_size == 64); - - nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_64_BIT)); - { - nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 8)); - nir_store_global(b, nir_iadd(b, dst_addr, offset), 8, result, 0x1); - } - nir_push_else(b, NULL); - { - nir_def *result32 = nir_u2u32(b, result); - nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 4)); - nir_store_global(b, nir_iadd(b, dst_addr, offset), 4, result32, 0x1); - } - nir_pop_if(b, NULL); -} - -static void -nir_get_query_delta(nir_builder *b, nir_def *dst_addr, - nir_def *report_addr, nir_def *idx, - nir_def *flags) -{ - nir_def *offset = - nir_imul_imm(b, idx, 2 * sizeof(struct nvk_query_report)); - nir_def *begin_addr = - nir_iadd(b, report_addr, nir_i2i64(b, offset)); - nir_def *end_addr = - nir_iadd_imm(b, begin_addr, sizeof(struct nvk_query_report)); - - /* nvk_query_report::timestamp is the first uint64_t */ - nir_def *begin = nir_load_global(b, begin_addr, 16, 1, 64); - nir_def *end = nir_load_global(b, end_addr, 16, 1, 64); - - nir_def *delta = nir_isub(b, end, begin); - - nir_write_query_result(b, dst_addr, idx, flags, delta); -} - -static void -nvk_nir_copy_query(nir_builder *b, nir_variable *push, nir_def *i) -{ - nir_def *pool_addr = load_struct_var(b, push, 0); - nir_def *query_start = nir_u2u64(b, load_struct_var(b, push, 1)); - nir_def *query_stride = load_struct_var(b, push, 2); - nir_def *first_query = load_struct_var(b, push, 3); - nir_def *dst_addr = load_struct_var(b, push, 5); - nir_def *dst_stride = load_struct_var(b, push, 6); - nir_def *flags = load_struct_var(b, push, 7); - - nir_def *query = nir_iadd(b, first_query, i); - - nir_def *avail_addr = nvk_nir_available_addr(b, pool_addr, query); - nir_def *available = - nir_i2b(b, nir_load_global(b, avail_addr, 4, 1, 32)); - - nir_def *partial = nir_test_mask(b, flags, VK_QUERY_RESULT_PARTIAL_BIT); - nir_def *write_results = nir_ior(b, available, partial); - - nir_def *report_addr = - nvk_nir_query_report_addr(b, pool_addr, query_start, query_stride, - query); - nir_def *dst_offset = nir_imul(b, nir_u2u64(b, i), dst_stride); - - /* Timestamp queries are the only ones use a single report */ - nir_def *is_timestamp = - nir_ieq_imm(b, query_stride, sizeof(struct nvk_query_report)); - - nir_def *one = nir_imm_int(b, 1); - nir_def *num_reports; - nir_push_if(b, is_timestamp); - { - nir_push_if(b, write_results); - { - /* This is the timestamp case. We add 8 because we're loading - * nvk_query_report::timestamp. - */ - nir_def *timestamp = - nir_load_global(b, nir_iadd_imm(b, report_addr, 8), 8, 1, 64); - - nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset), - nir_imm_int(b, 0), flags, timestamp); - } - nir_pop_if(b, NULL); - } - nir_push_else(b, NULL); - { - /* Everything that isn't a timestamp has the invariant that the - * number of destination entries is equal to the query stride divided - * by the size of two reports. - */ - num_reports = nir_udiv_imm(b, query_stride, - 2 * sizeof(struct nvk_query_report)); - - nir_push_if(b, write_results); - { - nir_variable *r = - nir_local_variable_create(b->impl, glsl_uint_type(), "r"); - nir_store_var(b, r, nir_imm_int(b, 0), 0x1); - - nir_push_loop(b); - { - nir_break_if(b, nir_ige(b, nir_load_var(b, r), num_reports)); - - nir_get_query_delta(b, nir_iadd(b, dst_addr, dst_offset), - report_addr, nir_load_var(b, r), flags); - - nir_store_var(b, r, nir_iadd_imm(b, nir_load_var(b, r), 1), 0x1); - } - nir_pop_loop(b, NULL); - } - nir_pop_if(b, NULL); - } - nir_pop_if(b, NULL); - - num_reports = nir_if_phi(b, one, num_reports); - - nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)); - { - nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset), - num_reports, flags, nir_b2i64(b, available)); - } - nir_pop_if(b, NULL); -} - static nir_shader * build_copy_queries_shader(void) { @@ -870,16 +720,11 @@ build_copy_queries_shader(void) push_iface_type, "push"); b->shader->info.workgroup_size[0] = 32; - nir_def *wg_id = nir_load_workgroup_id(b); - nir_def *i = nir_iadd(b, nir_load_subgroup_invocation(b), - nir_imul_imm(b, nir_channel(b, wg_id, 0), 32)); - nir_def *query_count = load_struct_var(b, push, 4); - nir_push_if(b, nir_ilt(b, i, query_count)); - { - nvk_nir_copy_query(b, push, i); - } - nir_pop_if(b, NULL); + nvk_copy_queries(b, load_struct_var(b, push, 0), load_struct_var(b, push, 1), + load_struct_var(b, push, 2), load_struct_var(b, push, 3), + load_struct_var(b, push, 4), load_struct_var(b, push, 5), + load_struct_var(b, push, 6), load_struct_var(b, push, 7)); return build.shader; }