anv: Implement acceleration structure API
Rework: (Kevin) - Properly setup bvh_layout Our bvh resides in contiguous memory and can be divided into two sections: 1. anv_accel_struct_header, tightly followed by 2. actual bvh, which starts with root node, followed by interleaving leaves or internal nodes. - Update comments for some fields for BVH and nodes. - Properly populate the UUIDs in serialization header - separate header func into completely two paths based on compaction bit - Encode rt_uuid at second VK_UUID_SIZE. - Write query result at correct slot - add assertion for a 4B alignment - move bvh_layout to anv_bvh - Use meson option to decide which files to compile - The alignment of serialization size is not needed - Change static_assert to STATIC_ASSERT and move them inside functions Rework (Sagar) - Use anv_cmd_buffer_update_buffer instead of MI to copy data Rework (Lionel) - Remove flush after builds, and add flush in copy before dispatch - Handle the flushes in CmdWriteAccelerationStructuresPropertiesKHR properly Co-authored-by: Kevin Chuang <kaiwenjon23@gmail.com> Co-authored-by: Sagar Ghuge <sagar.ghuge@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31588>
This commit is contained in:
@@ -316,6 +316,10 @@ with_intel_vk_rt = get_option('intel-rt') \
|
||||
.disable_if(host_machine.cpu_family() != 'x86_64', error_message : 'Intel Ray Tracing is only supported on x86_64') \
|
||||
.allowed()
|
||||
|
||||
if with_intel_vk_rt
|
||||
with_intel_bvh_grl = get_option('intel-bvh-grl')
|
||||
endif
|
||||
|
||||
with_any_intel = [
|
||||
with_gallium_crocus,
|
||||
with_gallium_i915,
|
||||
|
@@ -657,6 +657,13 @@ option(
|
||||
description : 'Build the intel-clc compiler or use a system version.'
|
||||
)
|
||||
|
||||
option(
|
||||
'intel-bvh-grl',
|
||||
type : 'boolean',
|
||||
value : true,
|
||||
description : 'Build the BVH structure using GRL.'
|
||||
)
|
||||
|
||||
option(
|
||||
'install-intel-clc',
|
||||
type : 'boolean',
|
||||
|
@@ -985,14 +985,22 @@ VkResult anv_CreateDevice(
|
||||
|
||||
anv_device_utrace_init(device);
|
||||
|
||||
result = anv_genX(device->info, init_device_state)(device);
|
||||
result = vk_meta_device_init(&device->vk, &device->meta_device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_utrace;
|
||||
|
||||
result = anv_genX(device->info, init_device_state)(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_meta_device;
|
||||
|
||||
simple_mtx_init(&device->accel_struct_build.mutex, mtx_plain);
|
||||
|
||||
*pDevice = anv_device_to_handle(device);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_meta_device:
|
||||
vk_meta_device_finish(&device->vk, &device->meta_device);
|
||||
fail_utrace:
|
||||
anv_device_utrace_finish(device);
|
||||
fail_queues:
|
||||
@@ -1118,6 +1126,12 @@ void anv_DestroyDevice(
|
||||
/* Do TRTT batch garbage collection before destroying queues. */
|
||||
anv_device_finish_trtt(device);
|
||||
|
||||
if (device->accel_struct_build.radix_sort) {
|
||||
radix_sort_vk_destroy(device->accel_struct_build.radix_sort,
|
||||
_device, &device->vk.alloc);
|
||||
}
|
||||
vk_meta_device_finish(&device->vk, &device->meta_device);
|
||||
|
||||
anv_device_utrace_finish(device);
|
||||
|
||||
for (uint32_t i = 0; i < device->queue_count; i++)
|
||||
@@ -1218,6 +1232,8 @@ void anv_DestroyDevice(
|
||||
pthread_cond_destroy(&device->queue_submit);
|
||||
pthread_mutex_destroy(&device->mutex);
|
||||
|
||||
simple_mtx_destroy(&device->accel_struct_build.mutex);
|
||||
|
||||
ralloc_free(device->fp64_nir);
|
||||
|
||||
anv_device_destroy_context_or_vm(device);
|
||||
|
@@ -102,6 +102,7 @@
|
||||
#include "vk_log.h"
|
||||
#include "vk_ycbcr_conversion.h"
|
||||
#include "vk_video.h"
|
||||
#include "vk_meta.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -1828,6 +1829,11 @@ enum anv_rt_bvh_build_method {
|
||||
ANV_BVH_BUILD_METHOD_NEW_SAH,
|
||||
};
|
||||
|
||||
/* If serialization-breaking or algorithm-breaking changes are made,
|
||||
* increment the digits at the end
|
||||
*/
|
||||
#define ANV_RT_UUID_MACRO "ANV_RT_BVH_0001"
|
||||
|
||||
struct anv_device_astc_emu {
|
||||
struct vk_texcompress_astc_state *texcompress;
|
||||
|
||||
@@ -2102,6 +2108,14 @@ struct anv_device {
|
||||
*/
|
||||
struct util_dynarray prints;
|
||||
} printf;
|
||||
|
||||
struct {
|
||||
simple_mtx_t mutex;
|
||||
struct radix_sort_vk *radix_sort;
|
||||
struct vk_acceleration_structure_build_args build_args;
|
||||
} accel_struct_build;
|
||||
|
||||
struct vk_meta_device meta_device;
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
|
50
src/intel/vulkan/bvh/meson.build
Normal file
50
src/intel/vulkan/bvh/meson.build
Normal file
@@ -0,0 +1,50 @@
|
||||
# Copyright © 2022 Konstantin Seurer
|
||||
# Copyright © 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
# source file, output name, defines
|
||||
bvh_shaders = [
|
||||
[
|
||||
'encode.comp',
|
||||
'encode',
|
||||
[],
|
||||
],
|
||||
[
|
||||
'header.comp',
|
||||
'header',
|
||||
[],
|
||||
],
|
||||
[
|
||||
'copy.comp',
|
||||
'copy',
|
||||
[]
|
||||
],
|
||||
]
|
||||
|
||||
anv_bvh_include_dir = dir_source_root + '/src/intel/vulkan/bvh'
|
||||
|
||||
anv_bvh_includes = files(
|
||||
'anv_build_helpers.h',
|
||||
'anv_build_interface.h',
|
||||
'anv_bvh.h',
|
||||
)
|
||||
|
||||
bvh_spv = []
|
||||
foreach s : bvh_shaders
|
||||
command = [
|
||||
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '-I' + anv_bvh_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
|
||||
]
|
||||
command += glslang_quiet
|
||||
|
||||
foreach define : s[2]
|
||||
command += '-D' + define
|
||||
endforeach
|
||||
|
||||
bvh_spv += custom_target(
|
||||
s[1] + '.spv.h',
|
||||
input : s[0],
|
||||
output : s[1] + '.spv.h',
|
||||
command : command,
|
||||
depend_files: [vk_bvh_includes, anv_bvh_includes],
|
||||
)
|
||||
endforeach
|
@@ -1,24 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
/* Copyright © 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
@@ -35,8 +16,410 @@
|
||||
|
||||
#include "ds/intel_tracepoints.h"
|
||||
|
||||
#include "bvh/anv_build_interface.h"
|
||||
#include "vk_acceleration_structure.h"
|
||||
#include "radix_sort/radix_sort_u64.h"
|
||||
#include "radix_sort/common/vk/barrier.h"
|
||||
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "genX_mi_builder.h"
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
|
||||
static const uint32_t encode_spv[] = {
|
||||
#include "bvh/encode.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t header_spv[] = {
|
||||
#include "bvh/header.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t copy_spv[] = {
|
||||
#include "bvh/copy.spv.h"
|
||||
};
|
||||
|
||||
static VkResult
|
||||
get_pipeline_spv(struct anv_device *device,
|
||||
const char *name, const uint32_t *spv, uint32_t spv_size,
|
||||
unsigned push_constant_size, VkPipeline *pipeline,
|
||||
VkPipelineLayout *layout)
|
||||
{
|
||||
|
||||
size_t key_size = strlen(name);
|
||||
|
||||
const VkPushConstantRange pc_range = {
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = push_constant_size,
|
||||
};
|
||||
|
||||
VkResult result = vk_meta_get_pipeline_layout(&device->vk,
|
||||
&device->meta_device, NULL,
|
||||
&pc_range, name, key_size,
|
||||
layout);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
VkPipeline pipeline_from_cache =
|
||||
vk_meta_lookup_pipeline(&device->meta_device, name, key_size);
|
||||
if (pipeline_from_cache != VK_NULL_HANDLE) {
|
||||
*pipeline = pipeline_from_cache;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkShaderModuleCreateInfo module_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.pNext = NULL,
|
||||
.flags = 0,
|
||||
.codeSize = spv_size,
|
||||
.pCode = spv,
|
||||
};
|
||||
|
||||
VkPipelineShaderStageCreateInfo shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = &module_info,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.flags = 0,
|
||||
.stage = shader_stage,
|
||||
.layout = *layout,
|
||||
};
|
||||
|
||||
return vk_meta_create_compute_pipeline(&device->vk, &device->meta_device,
|
||||
&pipeline_info, name, key_size, pipeline);
|
||||
}
|
||||
|
||||
static void
|
||||
get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count,
|
||||
struct bvh_layout *layout)
|
||||
{
|
||||
uint32_t internal_count = MAX2(leaf_count, 2) - 1;
|
||||
|
||||
uint64_t offset = ANV_RT_BVH_HEADER_SIZE;
|
||||
|
||||
/* For a TLAS, we store the address of anv_instance_leaf after header
|
||||
* This is for quick access in the copy.comp
|
||||
*/
|
||||
if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
||||
offset += leaf_count * sizeof(uint64_t);
|
||||
}
|
||||
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
|
||||
offset = ALIGN(offset, 64);
|
||||
|
||||
/* This is where internal_nodes/leaves start to be encoded */
|
||||
layout->bvh_offset = offset;
|
||||
|
||||
offset += internal_count * ANV_RT_INTERNAL_NODE_SIZE;
|
||||
|
||||
switch (geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
/* Currently we encode one triangle within one quad leaf */
|
||||
offset += leaf_count * ANV_RT_QUAD_LEAF_SIZE;
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
offset += leaf_count * ANV_RT_PROCEDURAL_LEAF_SIZE;
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
||||
offset += leaf_count * ANV_RT_INSTANCE_LEAF_SIZE;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown VkGeometryTypeKHR");
|
||||
}
|
||||
|
||||
layout->size = offset;
|
||||
}
|
||||
|
||||
static VkDeviceSize
|
||||
anv_get_as_size(VkDevice device,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
|
||||
uint32_t leaf_count)
|
||||
{
|
||||
struct bvh_layout layout;
|
||||
get_bvh_layout(vk_get_as_geometry_type(pBuildInfo), leaf_count, &layout);
|
||||
return layout.size;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_get_encode_key(VkAccelerationStructureTypeKHR type,
|
||||
VkBuildAccelerationStructureFlagBitsKHR flags)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
VkResult result = get_pipeline_spv(device, "encode", encode_spv,
|
||||
sizeof(encode_spv),
|
||||
sizeof(struct encode_args), &pipeline,
|
||||
&layout);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
pipeline);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_encode_as(VkCommandBuffer commandBuffer,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
|
||||
VkDeviceAddress intermediate_as_addr,
|
||||
VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
|
||||
uint32_t key,
|
||||
struct vk_acceleration_structure *dst)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
|
||||
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
|
||||
sizeof(struct encode_args), &pipeline, &layout);
|
||||
|
||||
STATIC_ASSERT(sizeof(struct anv_accel_struct_header) == ANV_RT_BVH_HEADER_SIZE);
|
||||
STATIC_ASSERT(sizeof(struct anv_instance_leaf) == ANV_RT_INSTANCE_LEAF_SIZE);
|
||||
STATIC_ASSERT(sizeof(struct anv_quad_leaf_node) == ANV_RT_QUAD_LEAF_SIZE);
|
||||
STATIC_ASSERT(sizeof(struct anv_procedural_leaf_node) == ANV_RT_PROCEDURAL_LEAF_SIZE);
|
||||
STATIC_ASSERT(sizeof(struct anv_internal_node) == ANV_RT_INTERNAL_NODE_SIZE);
|
||||
|
||||
struct bvh_layout bvh_layout;
|
||||
get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
|
||||
|
||||
const struct encode_args args = {
|
||||
.intermediate_bvh = intermediate_as_addr,
|
||||
.output_bvh = vk_acceleration_structure_get_va(dst) +
|
||||
bvh_layout.bvh_offset,
|
||||
.header = intermediate_header_addr,
|
||||
.output_bvh_offset = bvh_layout.bvh_offset,
|
||||
.leaf_node_count = leaf_count,
|
||||
.geometry_type = geometry_type,
|
||||
};
|
||||
|
||||
VkPushConstantsInfoKHR push_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
|
||||
.layout = layout,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = sizeof(args),
|
||||
.pValues = &args,
|
||||
};
|
||||
|
||||
anv_CmdPushConstants2KHR(commandBuffer, &push_info);
|
||||
|
||||
struct anv_address indirect_addr =
|
||||
anv_address_from_u64(intermediate_header_addr +
|
||||
offsetof(struct vk_ir_header, ir_internal_node_count));
|
||||
anv_genX(cmd_buffer->device->info, cmd_buffer_dispatch_indirect)
|
||||
(cmd_buffer, indirect_addr, true /* is_unaligned_size_x */);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_get_header_key(VkAccelerationStructureTypeKHR type,
|
||||
VkBuildAccelerationStructureFlagBitsKHR flags)
|
||||
{
|
||||
return (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ?
|
||||
1 : 0;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
if (key == 1) {
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
VkResult result = get_pipeline_spv(cmd_buffer->device, "header",
|
||||
header_spv, sizeof(header_spv),
|
||||
sizeof(struct header_args), &pipeline,
|
||||
&layout);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
pipeline);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_init_header(VkCommandBuffer commandBuffer,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
|
||||
VkDeviceAddress intermediate_as_addr,
|
||||
VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
|
||||
uint32_t key,
|
||||
struct vk_acceleration_structure *dst)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
|
||||
|
||||
struct bvh_layout bvh_layout;
|
||||
get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
|
||||
|
||||
VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
|
||||
|
||||
UNUSED size_t base = offsetof(struct anv_accel_struct_header,
|
||||
copy_dispatch_size);
|
||||
|
||||
uint32_t instance_count = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
|
||||
leaf_count : 0;
|
||||
|
||||
if (key == 1) {
|
||||
/* Add a barrier to ensure the writes from encode.comp is ready to be
|
||||
* read by header.comp
|
||||
*/
|
||||
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
||||
|
||||
/* VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR is set, so we
|
||||
* want to populate header.compacted_size with the compacted size, which
|
||||
* needs to be calculated by using ir_header.dst_node_offset, which we'll
|
||||
* access in the header.comp.
|
||||
*/
|
||||
base = offsetof(struct anv_accel_struct_header, instance_count);
|
||||
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
|
||||
sizeof(struct header_args), &pipeline, &layout);
|
||||
|
||||
struct header_args args = {
|
||||
.src = intermediate_header_addr,
|
||||
.dst = vk_acceleration_structure_get_va(dst),
|
||||
.bvh_offset = bvh_layout.bvh_offset,
|
||||
.instance_count = instance_count,
|
||||
};
|
||||
|
||||
VkPushConstantsInfoKHR push_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
|
||||
.layout = layout,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = sizeof(args),
|
||||
.pValues = &args,
|
||||
};
|
||||
|
||||
anv_CmdPushConstants2KHR(commandBuffer, &push_info);
|
||||
vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
|
||||
} else {
|
||||
vk_barrier_compute_w_to_host_r(commandBuffer);
|
||||
|
||||
struct anv_accel_struct_header header = {};
|
||||
|
||||
header.instance_count = instance_count;
|
||||
header.self_ptr = header_addr;
|
||||
header.compacted_size = bvh_layout.size;
|
||||
|
||||
/* 128 is local_size_x in copy.comp shader, 8 is the amount of data
|
||||
* copied by each iteration of that shader's loop
|
||||
*/
|
||||
header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size,
|
||||
8 * 128);
|
||||
header.copy_dispatch_size[1] = 1;
|
||||
header.copy_dispatch_size[2] = 1;
|
||||
|
||||
header.serialization_size =
|
||||
header.compacted_size +
|
||||
sizeof(struct vk_accel_struct_serialization_header) +
|
||||
sizeof(uint64_t) * header.instance_count;
|
||||
|
||||
header.size = header.compacted_size;
|
||||
|
||||
size_t header_size = sizeof(struct anv_accel_struct_header) - base;
|
||||
assert(base % sizeof(uint32_t) == 0);
|
||||
assert(header_size % sizeof(uint32_t) == 0);
|
||||
uint32_t *header_ptr = (uint32_t *)((char *)&header + base);
|
||||
|
||||
struct anv_address addr = anv_address_from_u64(header_addr + base);
|
||||
anv_cmd_buffer_update_addr(cmd_buffer, addr, 0, header_size,
|
||||
header_ptr, false);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct vk_acceleration_structure_build_ops anv_build_ops = {
|
||||
.get_as_size = anv_get_as_size,
|
||||
.get_encode_key = { anv_get_encode_key, anv_get_header_key },
|
||||
.encode_bind_pipeline = { anv_encode_bind_pipeline,
|
||||
anv_init_header_bind_pipeline },
|
||||
.encode_as = { anv_encode_as, anv_init_header },
|
||||
};
|
||||
|
||||
static VkResult
|
||||
anv_device_init_accel_struct_build_state(struct anv_device *device)
|
||||
{
|
||||
VkResult result = VK_SUCCESS;
|
||||
simple_mtx_lock(&device->accel_struct_build.mutex);
|
||||
|
||||
if (device->accel_struct_build.radix_sort)
|
||||
goto exit;
|
||||
|
||||
const struct radix_sort_vk_target_config radix_sort_config = {
|
||||
.keyval_dwords = 2,
|
||||
.init = { .workgroup_size_log2 = 8, },
|
||||
.fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
|
||||
.histogram = {
|
||||
.workgroup_size_log2 = 8,
|
||||
.subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
|
||||
.block_rows = 14,
|
||||
},
|
||||
.prefix = {
|
||||
.workgroup_size_log2 = 8,
|
||||
.subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
|
||||
},
|
||||
.scatter = {
|
||||
.workgroup_size_log2 = 8,
|
||||
.subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
|
||||
.block_rows = 14,
|
||||
},
|
||||
};
|
||||
|
||||
device->accel_struct_build.radix_sort =
|
||||
vk_create_radix_sort_u64(anv_device_to_handle(device),
|
||||
&device->vk.alloc,
|
||||
VK_NULL_HANDLE, radix_sort_config);
|
||||
|
||||
device->vk.as_build_ops = &anv_build_ops;
|
||||
device->vk.write_buffer_cp = anv_cmd_write_buffer_cp;
|
||||
device->vk.flush_buffer_write_cp = anv_cmd_flush_buffer_write_cp;
|
||||
device->vk.cmd_dispatch_unaligned = anv_cmd_dispatch_unaligned;
|
||||
device->vk.cmd_fill_buffer_addr = anv_cmd_fill_buffer_addr;
|
||||
|
||||
device->accel_struct_build.build_args =
|
||||
(struct vk_acceleration_structure_build_args) {
|
||||
.subgroup_size = device->info->ver >= 20 ? 16 : 8,
|
||||
.radix_sort = device->accel_struct_build.radix_sort,
|
||||
/* See struct anv_accel_struct_header from anv_bvh.h
|
||||
*
|
||||
* Root pointer starts at offset 0 and bound box start at offset 8.
|
||||
*/
|
||||
.bvh_bounds_offset = 8,
|
||||
};
|
||||
|
||||
exit:
|
||||
simple_mtx_unlock(&device->accel_struct_build.mutex);
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
genX(GetAccelerationStructureBuildSizesKHR)(
|
||||
VkDevice _device,
|
||||
@@ -45,6 +428,12 @@ genX(GetAccelerationStructureBuildSizesKHR)(
|
||||
const uint32_t* pMaxPrimitiveCounts,
|
||||
VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
if (anv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts,
|
||||
pSizeInfo, &device->accel_struct_build.build_args);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -53,6 +442,18 @@ genX(GetDeviceAccelerationStructureCompatibilityKHR)(
|
||||
const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
|
||||
VkAccelerationStructureCompatibilityKHR* pCompatibility)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct vk_accel_struct_serialization_header* ser_header =
|
||||
(struct vk_accel_struct_serialization_header*)(pVersionInfo->pVersionData);
|
||||
|
||||
if (memcmp(ser_header->accel_struct_compat,
|
||||
device->physical->rt_uuid,
|
||||
sizeof(device->physical->rt_uuid)) == 0) {
|
||||
*pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR;
|
||||
} else {
|
||||
*pCompatibility =
|
||||
VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -62,6 +463,27 @@ genX(CmdBuildAccelerationStructuresKHR)(
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
VkResult result = anv_device_init_accel_struct_build_state(device);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd_buffer->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
struct anv_cmd_saved_state saved;
|
||||
anv_cmd_buffer_save_state(cmd_buffer,
|
||||
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
||||
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
||||
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
||||
|
||||
vk_cmd_build_acceleration_structures(commandBuffer, &device->vk,
|
||||
&device->meta_device, infoCount,
|
||||
pInfos, ppBuildRangeInfos,
|
||||
&device->accel_struct_build.build_args);
|
||||
|
||||
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -81,6 +503,61 @@ genX(CmdCopyAccelerationStructureKHR)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyAccelerationStructureInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
|
||||
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
|
||||
sizeof(copy_spv), sizeof(struct copy_args),
|
||||
&pipeline, &layout);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd_buffer->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
struct anv_cmd_saved_state saved;
|
||||
anv_cmd_buffer_save_state(cmd_buffer,
|
||||
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
||||
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
||||
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
||||
|
||||
anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
pipeline);
|
||||
|
||||
struct copy_args consts = {
|
||||
.src_addr = vk_acceleration_structure_get_va(src),
|
||||
.dst_addr = vk_acceleration_structure_get_va(dst),
|
||||
.mode = ANV_COPY_MODE_COPY,
|
||||
};
|
||||
|
||||
VkPushConstantsInfoKHR push_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
|
||||
.layout = layout,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = sizeof(consts),
|
||||
.pValues = &consts,
|
||||
};
|
||||
|
||||
anv_CmdPushConstants2KHR(commandBuffer, &push_info);
|
||||
|
||||
/* L1/L2 caches flushes should have been dealt with by pipeline barriers.
|
||||
* Unfortunately some platforms require L3 flush because CS (reading the
|
||||
* dispatch paramters) is not L3 coherent.
|
||||
*/
|
||||
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_DATA_CACHE_FLUSH_BIT,
|
||||
"bvh size read for dispatch");
|
||||
}
|
||||
|
||||
anv_genX(cmd_buffer->device->info, CmdDispatchIndirect)(
|
||||
commandBuffer, src->buffer,
|
||||
src->offset + offsetof(struct anv_accel_struct_header,
|
||||
copy_dispatch_size));
|
||||
|
||||
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -88,6 +565,66 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
|
||||
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
VkResult result = get_pipeline_spv(device, "copy", copy_spv,
|
||||
sizeof(copy_spv),
|
||||
sizeof(struct copy_args), &pipeline,
|
||||
&layout);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd_buffer->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
struct anv_cmd_saved_state saved;
|
||||
anv_cmd_buffer_save_state(cmd_buffer,
|
||||
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
||||
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
||||
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
||||
|
||||
anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
pipeline);
|
||||
|
||||
struct copy_args consts = {
|
||||
.src_addr = vk_acceleration_structure_get_va(src),
|
||||
.dst_addr = pInfo->dst.deviceAddress,
|
||||
.mode = ANV_COPY_MODE_SERIALIZE,
|
||||
};
|
||||
|
||||
memcpy(consts.driver_uuid, device->physical->driver_uuid, VK_UUID_SIZE);
|
||||
memcpy(consts.accel_struct_compat, device->physical->rt_uuid, VK_UUID_SIZE);
|
||||
|
||||
VkPushConstantsInfoKHR push_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
|
||||
.layout = layout,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = sizeof(consts),
|
||||
.pValues = &consts,
|
||||
};
|
||||
|
||||
anv_CmdPushConstants2KHR(commandBuffer, &push_info);
|
||||
|
||||
/* L1/L2 caches flushes should have been dealt with by pipeline barriers.
|
||||
* Unfortunately some platforms require L3 flush because CS (reading the
|
||||
* dispatch paramters) is not L3 coherent.
|
||||
*/
|
||||
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
|
||||
"bvh size read for dispatch");
|
||||
}
|
||||
|
||||
anv_genX(device->info, CmdDispatchIndirect)(
|
||||
commandBuffer, src->buffer,
|
||||
src->offset + offsetof(struct anv_accel_struct_header,
|
||||
copy_dispatch_size));
|
||||
|
||||
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -95,6 +632,48 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
|
||||
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
|
||||
sizeof(copy_spv),
|
||||
sizeof(struct copy_args), &pipeline,
|
||||
&layout);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd_buffer->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
struct anv_cmd_saved_state saved;
|
||||
anv_cmd_buffer_save_state(cmd_buffer,
|
||||
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
||||
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
||||
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
||||
|
||||
anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
pipeline);
|
||||
|
||||
const struct copy_args consts = {
|
||||
.src_addr = pInfo->src.deviceAddress,
|
||||
.dst_addr = vk_acceleration_structure_get_va(dst),
|
||||
.mode = ANV_COPY_MODE_DESERIALIZE,
|
||||
};
|
||||
|
||||
VkPushConstantsInfoKHR push_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
|
||||
.layout = layout,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = sizeof(consts),
|
||||
.pValues = &consts,
|
||||
};
|
||||
|
||||
anv_CmdPushConstants2KHR(commandBuffer, &push_info);
|
||||
|
||||
vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
|
||||
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
||||
}
|
||||
|
||||
/* TODO: Host commands */
|
||||
|
@@ -31,7 +31,7 @@
|
||||
|
||||
#include "vk_standard_sample_locations.h"
|
||||
|
||||
#if GFX_VERx10 >= 125 && ANV_SUPPORT_RT
|
||||
#if GFX_VERx10 >= 125 && ANV_SUPPORT_RT_GRL
|
||||
#include "grl/genX_grl.h"
|
||||
#endif
|
||||
|
||||
@@ -863,9 +863,15 @@ void
|
||||
genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice)
|
||||
{
|
||||
assert(pdevice->info.verx10 == GFX_VERx10);
|
||||
|
||||
#if GFX_VERx10 >= 125 && ANV_SUPPORT_RT
|
||||
#if ANV_SUPPORT_RT_GRL
|
||||
genX(grl_load_rt_uuid)(pdevice->rt_uuid);
|
||||
pdevice->max_grl_scratch_size = genX(grl_max_scratch_size)();
|
||||
#else
|
||||
STATIC_ASSERT(sizeof(ANV_RT_UUID_MACRO) == VK_UUID_SIZE);
|
||||
memcpy(pdevice->rt_uuid, ANV_RT_UUID_MACRO, VK_UUID_SIZE);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp);
|
||||
|
@@ -1052,19 +1052,19 @@ emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_query_clear_flush(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_query_pool *pool,
|
||||
const char *reason)
|
||||
static bool
|
||||
append_query_clear_flush(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_query_pool *pool,
|
||||
const char *reason)
|
||||
{
|
||||
if (cmd_buffer->state.queries.clear_bits == 0)
|
||||
return;
|
||||
return false;
|
||||
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_QUERY_BITS(
|
||||
cmd_buffer->state.queries.clear_bits),
|
||||
reason);
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -1079,7 +1079,9 @@ void genX(CmdBeginQueryIndexedEXT)(
|
||||
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
|
||||
struct anv_address query_addr = anv_query_address(pool, query);
|
||||
|
||||
emit_query_clear_flush(cmd_buffer, pool, "CmdBeginQuery* flush query clears");
|
||||
if (append_query_clear_flush(cmd_buffer, pool,
|
||||
"CmdBeginQuery* flush query clears"))
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
@@ -1532,8 +1534,9 @@ void genX(CmdWriteTimestamp2)(
|
||||
|
||||
assert(pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP);
|
||||
|
||||
emit_query_clear_flush(cmd_buffer, pool,
|
||||
"CmdWriteTimestamp flush query clears");
|
||||
if (append_query_clear_flush(cmd_buffer, pool,
|
||||
"CmdWriteTimestamp flush query clears"))
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
@@ -2014,11 +2017,15 @@ void genX(CmdCopyQueryPoolResults)(
|
||||
|
||||
#if GFX_VERx10 >= 125 && ANV_SUPPORT_RT
|
||||
|
||||
#if ANV_SUPPORT_RT_GRL
|
||||
#include "grl/include/GRLRTASCommon.h"
|
||||
#include "grl/grl_metakernel_postbuild_info.h"
|
||||
#else
|
||||
#include "bvh/anv_bvh.h"
|
||||
#endif
|
||||
|
||||
static void
|
||||
anv_write_acceleration_structure_properties_grl(
|
||||
void
|
||||
genX(CmdWriteAccelerationStructuresPropertiesKHR)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t accelerationStructureCount,
|
||||
const VkAccelerationStructureKHR* pAccelerationStructures,
|
||||
@@ -2026,15 +2033,31 @@ anv_write_acceleration_structure_properties_grl(
|
||||
VkQueryPool queryPool,
|
||||
uint32_t firstQuery)
|
||||
{
|
||||
assert(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
|
||||
queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
|
||||
queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
|
||||
queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR);
|
||||
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
|
||||
|
||||
emit_query_clear_flush(cmd_buffer, pool,
|
||||
"CmdWriteAccelerationStructuresPropertiesKHR flush query clears");
|
||||
#if !ANV_SUPPORT_RT_GRL
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT |
|
||||
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
|
||||
"read BVH data using CS");
|
||||
#endif
|
||||
|
||||
if (append_query_clear_flush(
|
||||
cmd_buffer, pool,
|
||||
"CmdWriteAccelerationStructuresPropertiesKHR flush query clears") ||
|
||||
!ANV_SUPPORT_RT_GRL)
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
#if ANV_SUPPORT_RT_GRL
|
||||
for (uint32_t i = 0; i < accelerationStructureCount; i++) {
|
||||
ANV_FROM_HANDLE(vk_acceleration_structure, accel, pAccelerationStructures[i]);
|
||||
struct anv_address query_addr =
|
||||
@@ -2076,26 +2099,47 @@ anv_write_acceleration_structure_properties_grl(
|
||||
|
||||
for (uint32_t i = 0; i < accelerationStructureCount; i++)
|
||||
emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), true);
|
||||
}
|
||||
|
||||
void
|
||||
genX(CmdWriteAccelerationStructuresPropertiesKHR)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t accelerationStructureCount,
|
||||
const VkAccelerationStructureKHR* pAccelerationStructures,
|
||||
VkQueryType queryType,
|
||||
VkQueryPool queryPool,
|
||||
uint32_t firstQuery)
|
||||
{
|
||||
assert(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
|
||||
queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
|
||||
queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
|
||||
queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR);
|
||||
#else
|
||||
for (uint32_t i = 0; i < accelerationStructureCount; i++) {
|
||||
ANV_FROM_HANDLE(vk_acceleration_structure, accel, pAccelerationStructures[i]);
|
||||
struct anv_address query_addr =
|
||||
anv_address_add(anv_query_address(pool, firstQuery + i), 8);
|
||||
uint64_t va = vk_acceleration_structure_get_va(accel);
|
||||
|
||||
anv_write_acceleration_structure_properties_grl(commandBuffer,
|
||||
accelerationStructureCount,
|
||||
pAccelerationStructures,
|
||||
queryType, queryPool,
|
||||
firstQuery);
|
||||
mi_builder_set_write_check(&b, (i == (accelerationStructureCount - 1)));
|
||||
|
||||
switch (queryType) {
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||
va += offsetof(struct anv_accel_struct_header, compacted_size);
|
||||
break;
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||
va += offsetof(struct anv_accel_struct_header, size);
|
||||
break;
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||
va += offsetof(struct anv_accel_struct_header, serialization_size);
|
||||
break;
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||
va += offsetof(struct anv_accel_struct_header, instance_count);
|
||||
/* To respect current set up tailored for GRL, the numBlasPtrs are
|
||||
* stored at the second slot (third slot, if you count availability)
|
||||
*/
|
||||
query_addr = anv_address_add(query_addr, 8);
|
||||
break;
|
||||
default:
|
||||
unreachable("unhandled query type");
|
||||
}
|
||||
|
||||
mi_store(&b, mi_mem64(query_addr), mi_mem64(anv_address_from_u64(va)));
|
||||
}
|
||||
|
||||
struct mi_builder b1;
|
||||
mi_builder_init(&b1, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
for (uint32_t i = 0; i < accelerationStructureCount; i++) {
|
||||
mi_builder_set_write_check(&b1, (i == (accelerationStructureCount - 1)));
|
||||
emit_query_mi_availability(&b1, anv_query_address(pool, firstQuery + i), true);
|
||||
}
|
||||
#endif /* ANV_SUPPORT_RT_GRL */
|
||||
}
|
||||
#endif
|
||||
#endif /* GFX_VERx10 >= 125 && ANV_SUPPORT_RT */
|
||||
|
@@ -38,13 +38,22 @@ idep_anv_headers = declare_dependency(
|
||||
)
|
||||
|
||||
if with_intel_vk_rt
|
||||
subdir('grl')
|
||||
optional_libgrl = [libgrl]
|
||||
if with_intel_bvh_grl
|
||||
subdir('grl')
|
||||
optional_libgrl = [libgrl]
|
||||
anv_flags += '-DANV_SUPPORT_RT_GRL=1'
|
||||
else
|
||||
subdir('bvh')
|
||||
idep_grl = null_dep
|
||||
optional_libgrl = []
|
||||
anv_flags += '-DANV_SUPPORT_RT_GRL=0'
|
||||
endif
|
||||
anv_flags += '-DANV_SUPPORT_RT=1'
|
||||
else
|
||||
idep_grl = null_dep
|
||||
optional_libgrl = []
|
||||
anv_flags += '-DANV_SUPPORT_RT=0'
|
||||
anv_flags += '-DANV_SUPPORT_RT_GRL=0'
|
||||
endif
|
||||
|
||||
intel_icd = custom_target(
|
||||
@@ -101,9 +110,15 @@ anv_per_hw_ver_files = files(
|
||||
'genX_simple_shader.c',
|
||||
)
|
||||
if with_intel_vk_rt
|
||||
anv_per_hw_ver_files += files(
|
||||
'genX_acceleration_structure_grl.c',
|
||||
)
|
||||
if with_intel_bvh_grl
|
||||
anv_per_hw_ver_files += files(
|
||||
'genX_acceleration_structure_grl.c',
|
||||
)
|
||||
else
|
||||
anv_per_hw_ver_files += files(
|
||||
'genX_acceleration_structure.c',
|
||||
)
|
||||
endif
|
||||
endif
|
||||
|
||||
foreach _gfx_ver : ['90', '110', '120', '125', '200', '300']
|
||||
@@ -194,6 +209,8 @@ libanv_files = files(
|
||||
'anv_va.c',
|
||||
'anv_video.c',
|
||||
'anv_wsi.c',
|
||||
'bvh/anv_bvh.h',
|
||||
'bvh/anv_build_interface.h',
|
||||
)
|
||||
|
||||
anv_deps = [
|
||||
@@ -248,7 +265,7 @@ libanv_common = static_library(
|
||||
|
||||
libvulkan_intel = shared_library(
|
||||
'vulkan_intel',
|
||||
[files('anv_gem.c'), anv_entrypoints[0]],
|
||||
[files('anv_gem.c'), anv_entrypoints[0], bvh_spv],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_intel,
|
||||
],
|
||||
|
Reference in New Issue
Block a user