
anv_address_physical() already returns a canonical address. Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23977>
1269 lines
45 KiB
C
1269 lines
45 KiB
C
/*
|
|
* Copyright © 2020 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "anv_private.h"
|
|
|
|
#include <math.h>
|
|
|
|
#include "util/u_debug.h"
|
|
#include "util/half_float.h"
|
|
#include "util/u_atomic.h"
|
|
|
|
#include "genxml/gen_macros.h"
|
|
#include "genxml/genX_pack.h"
|
|
#include "genxml/genX_rt_pack.h"
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
#include "grl/grl_structs.h"
|
|
|
|
/* Wait for the previous dispatches to finish and flush their data port
|
|
* writes.
|
|
*/
|
|
#define ANV_GRL_FLUSH_FLAGS (ANV_PIPE_END_OF_PIPE_SYNC_BIT | \
|
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
|
|
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT)
|
|
|
|
static const VkAccelerationStructureGeometryKHR *
|
|
get_geometry(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
|
|
uint32_t index)
|
|
{
|
|
return pInfo->pGeometries ? &pInfo->pGeometries[index] :
|
|
pInfo->ppGeometries[index];
|
|
}
|
|
|
|
static size_t align_transient_size(size_t bytes)
|
|
{
|
|
return ALIGN(bytes, 64);
|
|
}
|
|
|
|
static size_t align_private_size(size_t bytes)
|
|
{
|
|
return ALIGN(bytes, 64);
|
|
}
|
|
|
|
static size_t get_scheduler_size(size_t num_builds)
|
|
{
|
|
size_t scheduler_size = sizeof(union SchedulerUnion);
|
|
/* add more memory for qnode creation stage if needed */
|
|
if (num_builds > QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) {
|
|
scheduler_size += (num_builds - QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) * 2 *
|
|
sizeof(struct QNodeGlobalRootBufferEntry);
|
|
}
|
|
|
|
return align_private_size(scheduler_size);
|
|
}
|
|
|
|
static size_t
|
|
get_batched_binnedsah_transient_mem_size(size_t num_builds)
|
|
{
|
|
if (num_builds == 0)
|
|
return 0;
|
|
return num_builds * (sizeof(struct SAHBuildBuffersInfo) + sizeof(gpuva_t));
|
|
}
|
|
|
|
static size_t
|
|
get_batched_binnedsah_private_mem_size(size_t num_builds)
|
|
{
|
|
if (num_builds == 0)
|
|
return 0;
|
|
|
|
size_t globals_size = align_private_size(num_builds * sizeof(struct SAHBuildGlobals));
|
|
return globals_size + get_scheduler_size(num_builds);
|
|
}
|
|
|
|
static uint32_t
|
|
estimate_qbvh6_nodes(const uint32_t N)
|
|
{
|
|
const uint32_t W = 6;
|
|
const uint32_t N0 = N / 2 + N % 2; // lowest level with 2 leaves per QBVH6 node
|
|
const uint32_t N1 = N0 / W + (N0 % W ? 1 : 0); // filled level
|
|
const uint32_t N2 = N0 / W + (N1 % W ? 1 : 0); // filled level
|
|
const uint32_t N3 = N0 / W + (N2 % W ? 1 : 0); // filled level
|
|
const uint32_t N4 = N3; // overestimate remaining nodes
|
|
return N0 + N1 + N2 + N3 + N4;
|
|
}
|
|
|
|
/* Estimates the worst case number of QBVH6 nodes for a top-down BVH
|
|
* build that guarantees to produce subtree with N >= K primitives
|
|
* from which a single QBVH6 node is created.
|
|
*/
|
|
static uint32_t
|
|
estimate_qbvh6_nodes_minK(const uint32_t N, uint32_t K)
|
|
{
|
|
const uint32_t N0 = N / K + (N % K ? 1 : 0); // lowest level of nodes with K leaves minimally
|
|
return N0 + estimate_qbvh6_nodes(N0);
|
|
}
|
|
|
|
static size_t
|
|
estimate_qbvh6_fatleafs(const size_t P)
|
|
{
|
|
return P;
|
|
}
|
|
|
|
static size_t
|
|
estimate_qbvh6_nodes_worstcase(const size_t P)
|
|
{
|
|
const size_t F = estimate_qbvh6_fatleafs(P);
|
|
|
|
// worst-case each inner node having 5 fat-leaf children.
|
|
// number of inner nodes is F/5 and number of fat-leaves is F
|
|
return F + ceil(F/5.0);
|
|
}
|
|
|
|
#define sizeof_PrimRef 32
|
|
#define sizeof_HwInstanceLeaf (GENX(RT_BVH_INSTANCE_LEAF_length) * 4)
|
|
#define sizeof_InternalNode (GENX(RT_BVH_INTERNAL_NODE_length) * 4)
|
|
#define sizeof_Procedural (GENX(RT_BVH_PROCEDURAL_LEAF_length) * 4)
|
|
#define sizeof_Quad (GENX(RT_BVH_QUAD_LEAF_length) * 4)
|
|
|
|
static struct MKSizeEstimate
|
|
get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
|
|
const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos,
|
|
const uint32_t *pMaxPrimitiveCounts)
|
|
{
|
|
uint32_t num_triangles = 0, num_aabbs = 0, num_instances = 0;
|
|
for (unsigned g = 0; g < pInfo->geometryCount; g++) {
|
|
const VkAccelerationStructureGeometryKHR *pGeometry =
|
|
get_geometry(pInfo, g);
|
|
uint32_t prim_count = pBuildRangeInfos != NULL ?
|
|
pBuildRangeInfos[g].primitiveCount : pMaxPrimitiveCounts[g];
|
|
|
|
switch (pGeometry->geometryType) {
|
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
|
num_triangles += prim_count;
|
|
break;
|
|
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
|
num_aabbs += prim_count;
|
|
break;
|
|
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
|
num_instances += prim_count;
|
|
break;
|
|
default:
|
|
unreachable("Unsupported geometry type");
|
|
}
|
|
}
|
|
const uint32_t num_primitives = num_triangles + num_aabbs + num_instances;
|
|
|
|
struct MKSizeEstimate est = {};
|
|
|
|
uint64_t size = sizeof(BVHBase);
|
|
size = align64(size, 64);
|
|
|
|
/* Must immediately follow BVHBase because we use fixed offset to nodes. */
|
|
est.node_data_start = size;
|
|
|
|
switch (pInfo->type) {
|
|
case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: {
|
|
assert(num_triangles == 0 && num_aabbs == 0);
|
|
|
|
est.numPrimitives = num_instances;
|
|
est.numPrimitivesToSplit = 0;
|
|
est.numBuildPrimitives = est.numPrimitives + est.numPrimitivesToSplit;
|
|
|
|
est.min_primitives = est.numPrimitives;
|
|
est.max_primitives = est.numPrimitives + est.numPrimitivesToSplit;
|
|
|
|
unsigned int sizeInnerNodes =
|
|
(unsigned int) estimate_qbvh6_nodes_worstcase(est.numBuildPrimitives) *
|
|
sizeof_InternalNode;
|
|
if (sizeInnerNodes == 0)
|
|
sizeInnerNodes = sizeof_InternalNode;
|
|
|
|
est.max_inner_nodes = sizeInnerNodes / sizeof_InternalNode;
|
|
|
|
size += sizeInnerNodes;
|
|
STATIC_ASSERT(sizeof_InternalNode % 64 == 0);
|
|
|
|
est.leaf_data_start = size;
|
|
size += est.numBuildPrimitives * sizeof_HwInstanceLeaf;
|
|
STATIC_ASSERT(sizeof_HwInstanceLeaf % 64 == 0);
|
|
|
|
est.leaf_data_size = est.numBuildPrimitives * sizeof_HwInstanceLeaf;
|
|
|
|
break;
|
|
}
|
|
|
|
case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: {
|
|
assert(num_instances == 0);
|
|
|
|
/* RT: TODO */
|
|
const float split_factor = 0.0f;
|
|
uint32_t num_prims_to_split = 0;
|
|
if (false)
|
|
num_prims_to_split = num_triangles + (double)split_factor;
|
|
|
|
const uint32_t num_build_triangles = num_triangles + num_prims_to_split;
|
|
const uint32_t num_build_primitives = num_build_triangles + num_aabbs;
|
|
|
|
est.numPrimitives = num_primitives;
|
|
est.numTriangles = num_triangles;
|
|
est.numProcedurals = num_aabbs;
|
|
est.numMeshes = pInfo->geometryCount;
|
|
est.numBuildPrimitives = num_build_primitives;
|
|
est.numPrimitivesToSplit = num_prims_to_split;
|
|
est.max_instance_leafs = 0;
|
|
|
|
est.min_primitives = (size_t)(num_build_triangles * 0.5f + num_aabbs);
|
|
est.max_primitives = num_build_triangles + num_aabbs;
|
|
|
|
size_t nodeBytes = 0;
|
|
nodeBytes += estimate_qbvh6_nodes_worstcase(num_build_triangles) * sizeof_InternalNode;
|
|
nodeBytes += estimate_qbvh6_nodes_worstcase(num_aabbs) * sizeof_InternalNode;
|
|
if (nodeBytes == 0) // for case with 0 primitives
|
|
nodeBytes = sizeof_InternalNode;
|
|
nodeBytes = MAX2(nodeBytes, 8 * (size_t)num_build_primitives); // for primref_index0/1 buffers
|
|
|
|
est.max_inner_nodes = nodeBytes / sizeof_InternalNode;
|
|
|
|
size += nodeBytes;
|
|
STATIC_ASSERT(sizeof_InternalNode % 64 == 0);
|
|
|
|
est.leaf_data_start = size;
|
|
size += num_build_triangles * sizeof_Quad;
|
|
STATIC_ASSERT(sizeof_Quad % 64 == 0);
|
|
|
|
est.procedural_data_start = size;
|
|
size += num_aabbs * sizeof_Procedural;
|
|
STATIC_ASSERT(sizeof_Procedural % 64 == 0);
|
|
|
|
est.leaf_data_size = num_build_triangles * sizeof_Quad +
|
|
num_aabbs * sizeof_Procedural;
|
|
|
|
if (num_build_primitives == 0)
|
|
size += MAX2(sizeof_Quad, sizeof_Procedural);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
unreachable("Unsupported acceleration structure type");
|
|
}
|
|
|
|
size = align64(size, 64);
|
|
est.instance_descs_start = size;
|
|
size += sizeof(struct InstanceDesc) * num_instances;
|
|
|
|
est.geo_meta_data_start = size;
|
|
size += sizeof(struct GeoMetaData) * pInfo->geometryCount;
|
|
size = align64(size, 64);
|
|
|
|
assert(size == align64(size, 64));
|
|
est.back_pointer_start = size;
|
|
|
|
const bool alloc_backpointers = false; /* RT TODO */
|
|
if (alloc_backpointers) {
|
|
size += est.max_inner_nodes * sizeof(uint32_t);
|
|
size = align64(size, 64);
|
|
}
|
|
|
|
assert(size < UINT32_MAX);
|
|
est.sizeTotal = align64(size, 64);
|
|
|
|
return est;
|
|
}
|
|
|
|
struct scratch_layout {
|
|
gpuva_t base;
|
|
uint32_t total_size;
|
|
|
|
gpuva_t primrefs;
|
|
gpuva_t globals;
|
|
gpuva_t leaf_index_buffers;
|
|
uint32_t leaf_index_buffer_stride;
|
|
|
|
/* new_sah */
|
|
gpuva_t qnode_buffer;
|
|
gpuva_t bvh2_buffer;
|
|
};
|
|
|
|
static size_t
|
|
get_bvh2_size(uint32_t num_primitivies)
|
|
{
|
|
if (num_primitivies == 0)
|
|
return 0;
|
|
return sizeof(struct BVH2) +
|
|
(2 * num_primitivies - 1) * sizeof(struct BVH2Node);
|
|
}
|
|
|
|
static struct scratch_layout
|
|
get_gpu_scratch_layout(struct anv_address base,
|
|
struct MKSizeEstimate est,
|
|
enum anv_rt_bvh_build_method build_method)
|
|
{
|
|
struct scratch_layout scratch = {
|
|
.base = anv_address_physical(base),
|
|
};
|
|
gpuva_t current = anv_address_physical(base);
|
|
|
|
scratch.globals = current;
|
|
current += sizeof(struct Globals);
|
|
|
|
scratch.primrefs = intel_canonical_address(current);
|
|
current += est.numBuildPrimitives * sizeof_PrimRef;
|
|
|
|
scratch.leaf_index_buffers = intel_canonical_address(current);
|
|
current += est.numBuildPrimitives * sizeof(uint32_t) * 2;
|
|
scratch.leaf_index_buffer_stride = sizeof(uint32_t);
|
|
|
|
switch (build_method) {
|
|
case ANV_BVH_BUILD_METHOD_TRIVIAL:
|
|
break;
|
|
|
|
case ANV_BVH_BUILD_METHOD_NEW_SAH: {
|
|
size_t bvh2_size = get_bvh2_size(est.numBuildPrimitives);
|
|
if (est.leaf_data_size < bvh2_size) {
|
|
scratch.bvh2_buffer = intel_canonical_address(current);
|
|
current += bvh2_size;
|
|
}
|
|
|
|
scratch.qnode_buffer = intel_canonical_address(current);
|
|
current += 2 * sizeof(dword) * est.max_inner_nodes;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
unreachable("invalid build");
|
|
}
|
|
|
|
assert((current - scratch.base) < UINT32_MAX);
|
|
scratch.total_size = current - scratch.base;
|
|
|
|
return scratch;
|
|
}
|
|
|
|
static void
|
|
anv_get_gpu_acceleration_structure_size(
|
|
UNUSED struct anv_device *device,
|
|
VkAccelerationStructureBuildTypeKHR buildType,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
|
|
const uint32_t* pMaxPrimitiveCounts,
|
|
VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
|
|
{
|
|
|
|
struct MKSizeEstimate est = get_gpu_size_estimate(pBuildInfo, NULL,
|
|
pMaxPrimitiveCounts);
|
|
struct scratch_layout scratch = get_gpu_scratch_layout(ANV_NULL_ADDRESS, est,
|
|
device->bvh_build_method);
|
|
|
|
pSizeInfo->accelerationStructureSize = est.sizeTotal;
|
|
pSizeInfo->buildScratchSize = scratch.total_size;
|
|
pSizeInfo->updateScratchSize = scratch.total_size; /* TODO */
|
|
}
|
|
|
|
void
|
|
genX(GetAccelerationStructureBuildSizesKHR)(
|
|
VkDevice _device,
|
|
VkAccelerationStructureBuildTypeKHR buildType,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
|
|
const uint32_t* pMaxPrimitiveCounts,
|
|
VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
assert(pSizeInfo->sType ==
|
|
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR);
|
|
|
|
VkAccelerationStructureBuildSizesInfoKHR gpu_size_info;
|
|
anv_get_gpu_acceleration_structure_size(device, buildType, pBuildInfo,
|
|
pMaxPrimitiveCounts,
|
|
&gpu_size_info);
|
|
|
|
pSizeInfo->accelerationStructureSize =
|
|
gpu_size_info.accelerationStructureSize;
|
|
pSizeInfo->buildScratchSize = gpu_size_info.buildScratchSize;
|
|
pSizeInfo->updateScratchSize = gpu_size_info.updateScratchSize;
|
|
}
|
|
|
|
void
|
|
genX(GetDeviceAccelerationStructureCompatibilityKHR)(
|
|
VkDevice _device,
|
|
const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
|
|
VkAccelerationStructureCompatibilityKHR* pCompatibility)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
|
|
if (memcmp(pVersionInfo->pVersionData,
|
|
device->physical->rt_uuid,
|
|
sizeof(device->physical->rt_uuid)) == 0) {
|
|
*pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR;
|
|
} else {
|
|
*pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
|
|
}
|
|
}
|
|
|
|
static inline uint8_t
|
|
vk_to_grl_GeometryFlags(VkGeometryFlagsKHR flags)
|
|
{
|
|
uint8_t grl_flags = GEOMETRY_FLAG_NONE;
|
|
unsigned mask = flags;
|
|
while (mask) {
|
|
int i = u_bit_scan(&mask);
|
|
switch ((VkGeometryFlagBitsKHR)(1u << i)) {
|
|
case VK_GEOMETRY_OPAQUE_BIT_KHR:
|
|
grl_flags |= GEOMETRY_FLAG_OPAQUE;
|
|
break;
|
|
case VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR:
|
|
grl_flags |= GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION;
|
|
break;
|
|
default:
|
|
unreachable("Unsupported acceleration structure build flag");
|
|
}
|
|
}
|
|
return grl_flags;
|
|
}
|
|
|
|
static inline IndexFormat
|
|
vk_to_grl_IndexFormat(VkIndexType type)
|
|
{
|
|
switch (type) {
|
|
case VK_INDEX_TYPE_NONE_KHR: return INDEX_FORMAT_NONE;
|
|
case VK_INDEX_TYPE_UINT8_EXT: unreachable("No UINT8 support yet");
|
|
case VK_INDEX_TYPE_UINT16: return INDEX_FORMAT_R16_UINT;
|
|
case VK_INDEX_TYPE_UINT32: return INDEX_FORMAT_R32_UINT;
|
|
default:
|
|
unreachable("Unsupported index type");
|
|
}
|
|
}
|
|
|
|
static inline VertexFormat
|
|
vk_to_grl_VertexFormat(VkFormat format)
|
|
{
|
|
switch (format) {
|
|
case VK_FORMAT_R32G32_SFLOAT: return VERTEX_FORMAT_R32G32_FLOAT;
|
|
case VK_FORMAT_R32G32B32_SFLOAT: return VERTEX_FORMAT_R32G32B32_FLOAT;
|
|
case VK_FORMAT_R16G16_SFLOAT: return VERTEX_FORMAT_R16G16_FLOAT;
|
|
case VK_FORMAT_R16G16B16A16_SFLOAT: return VERTEX_FORMAT_R16G16B16A16_FLOAT;
|
|
case VK_FORMAT_R16G16_SNORM: return VERTEX_FORMAT_R16G16_SNORM;
|
|
case VK_FORMAT_R16G16B16A16_SNORM: return VERTEX_FORMAT_R16G16B16A16_SNORM;
|
|
case VK_FORMAT_R16G16B16A16_UNORM: return VERTEX_FORMAT_R16G16B16A16_UNORM;
|
|
case VK_FORMAT_R16G16_UNORM: return VERTEX_FORMAT_R16G16_UNORM;
|
|
/* case VK_FORMAT_R10G10B10A2_UNORM: return VERTEX_FORMAT_R10G10B10A2_UNORM; */
|
|
case VK_FORMAT_R8G8B8A8_UNORM: return VERTEX_FORMAT_R8G8B8A8_UNORM;
|
|
case VK_FORMAT_R8G8_UNORM: return VERTEX_FORMAT_R8G8_UNORM;
|
|
case VK_FORMAT_R8G8B8A8_SNORM: return VERTEX_FORMAT_R8G8B8A8_SNORM;
|
|
case VK_FORMAT_R8G8_SNORM: return VERTEX_FORMAT_R8G8_SNORM;
|
|
default:
|
|
unreachable("Unsupported vertex format");
|
|
}
|
|
}
|
|
|
|
static struct Geo
|
|
vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry,
|
|
uint32_t prim_count,
|
|
uint32_t transform_offset,
|
|
uint32_t primitive_offset,
|
|
uint32_t first_vertex)
|
|
{
|
|
struct Geo geo = {
|
|
.Flags = vk_to_grl_GeometryFlags(pGeometry->flags),
|
|
};
|
|
|
|
switch (pGeometry->geometryType) {
|
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
|
|
const VkAccelerationStructureGeometryTrianglesDataKHR *vk_tri =
|
|
&pGeometry->geometry.triangles;
|
|
|
|
geo.Type = GEOMETRY_TYPE_TRIANGLES;
|
|
|
|
geo.Desc.Triangles.pTransformBuffer =
|
|
vk_tri->transformData.deviceAddress;
|
|
geo.Desc.Triangles.pIndexBuffer =
|
|
vk_tri->indexData.deviceAddress;
|
|
geo.Desc.Triangles.pVertexBuffer =
|
|
vk_tri->vertexData.deviceAddress;
|
|
geo.Desc.Triangles.VertexBufferByteStride = vk_tri->vertexStride;
|
|
|
|
if (geo.Desc.Triangles.pTransformBuffer)
|
|
geo.Desc.Triangles.pTransformBuffer += transform_offset;
|
|
|
|
if (vk_tri->indexType == VK_INDEX_TYPE_NONE_KHR) {
|
|
geo.Desc.Triangles.IndexCount = 0;
|
|
geo.Desc.Triangles.VertexCount = prim_count * 3;
|
|
geo.Desc.Triangles.IndexFormat = INDEX_FORMAT_NONE;
|
|
geo.Desc.Triangles.pVertexBuffer += primitive_offset;
|
|
} else {
|
|
geo.Desc.Triangles.IndexCount = prim_count * 3;
|
|
geo.Desc.Triangles.VertexCount = vk_tri->maxVertex;
|
|
geo.Desc.Triangles.IndexFormat =
|
|
vk_to_grl_IndexFormat(vk_tri->indexType);
|
|
geo.Desc.Triangles.pIndexBuffer += primitive_offset;
|
|
}
|
|
|
|
geo.Desc.Triangles.VertexFormat =
|
|
vk_to_grl_VertexFormat(vk_tri->vertexFormat);
|
|
geo.Desc.Triangles.pVertexBuffer += vk_tri->vertexStride * first_vertex;
|
|
break;
|
|
}
|
|
|
|
case VK_GEOMETRY_TYPE_AABBS_KHR: {
|
|
const VkAccelerationStructureGeometryAabbsDataKHR *vk_aabbs =
|
|
&pGeometry->geometry.aabbs;
|
|
geo.Type = GEOMETRY_TYPE_PROCEDURAL;
|
|
geo.Desc.Procedural.pAABBs_GPUVA =
|
|
vk_aabbs->data.deviceAddress + primitive_offset;
|
|
geo.Desc.Procedural.AABBByteStride = vk_aabbs->stride;
|
|
geo.Desc.Procedural.AABBCount = prim_count;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
unreachable("Invalid geometry type");
|
|
}
|
|
|
|
return geo;
|
|
}
|
|
|
|
#include "grl/grl_metakernel_copy.h"
|
|
#include "grl/grl_metakernel_misc.h"
|
|
#include "grl/grl_metakernel_build_primref.h"
|
|
#include "grl/grl_metakernel_new_sah_builder.h"
|
|
#include "grl/grl_metakernel_build_leaf.h"
|
|
|
|
struct build_state {
|
|
enum anv_rt_bvh_build_method build_method;
|
|
|
|
struct MKSizeEstimate estimate;
|
|
struct scratch_layout scratch;
|
|
struct MKBuilderState state;
|
|
|
|
struct anv_address bvh_addr;
|
|
|
|
size_t geom_size_prefix_sum_buffer;
|
|
size_t transient_size;
|
|
|
|
uint32_t leaf_type;
|
|
uint32_t leaf_size;
|
|
|
|
uint32_t num_geometries;
|
|
uint32_t num_instances;
|
|
|
|
uint64_t instances_addr;
|
|
bool array_of_instances_ptr;
|
|
|
|
const VkAccelerationStructureGeometryKHR *vk_geoms;
|
|
};
|
|
|
|
static void
|
|
get_binnedsah_scratch_buffers(struct build_state *bs,
|
|
uint64_t *p_qnode_buffer,
|
|
uint64_t *p_primref_indices,
|
|
uint64_t *p_bvh2)
|
|
{
|
|
if (bs->estimate.numBuildPrimitives == 0)
|
|
{
|
|
*p_bvh2 = 0;
|
|
*p_qnode_buffer = 0;
|
|
*p_primref_indices = 0;
|
|
return;
|
|
}
|
|
|
|
size_t bvh2_size = get_bvh2_size(bs->estimate.numBuildPrimitives);
|
|
if (bs->estimate.leaf_data_size < bvh2_size) {
|
|
assert(bs->scratch.bvh2_buffer != 0);
|
|
*p_bvh2 = bs->scratch.bvh2_buffer;
|
|
} else {
|
|
*p_bvh2 = intel_canonical_address(bs->state.bvh_buffer +
|
|
bs->estimate.leaf_data_start);
|
|
}
|
|
|
|
assert(bs->scratch.qnode_buffer != 0);
|
|
*p_qnode_buffer = bs->scratch.qnode_buffer;
|
|
|
|
assert(bs->scratch.leaf_index_buffers != 0);
|
|
*p_primref_indices = bs->scratch.leaf_index_buffers;
|
|
}
|
|
|
|
static void
|
|
write_memory(struct anv_cmd_alloc alloc, size_t offset, const void *data, size_t data_len)
|
|
{
|
|
assert((offset + data_len) < alloc.size);
|
|
memcpy(alloc.map + offset, data, data_len);
|
|
}
|
|
|
|
static void
|
|
cmd_build_acceleration_structures(
|
|
struct anv_cmd_buffer *cmd_buffer,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
|
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
|
|
const VkDeviceAddress *pIndirectDeviceAddresses,
|
|
const uint32_t *pIndirectStrides,
|
|
const uint32_t *const *ppMaxPrimitiveCounts)
|
|
{
|
|
struct anv_device *device = cmd_buffer->device;
|
|
VK_MULTIALLOC(ma);
|
|
|
|
struct build_state *builds;
|
|
vk_multialloc_add(&ma, &builds, struct build_state, infoCount);
|
|
|
|
if (!vk_multialloc_zalloc(&ma,
|
|
&cmd_buffer->device->vk.alloc,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
|
|
anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
return;
|
|
}
|
|
|
|
/* TODO: Indirect */
|
|
assert(ppBuildRangeInfos != NULL);
|
|
|
|
size_t transient_mem_init_globals_size = 0;
|
|
size_t transient_mem_init_globals_offset = 0;
|
|
|
|
size_t transient_total = 0;
|
|
|
|
size_t private_mem_total = 0;
|
|
|
|
size_t num_trivial_builds = 0;
|
|
size_t num_new_sah_builds = 0;
|
|
|
|
/* Prepare a bunch of data for the kernels we have to run. */
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
struct build_state *bs = &builds[i];
|
|
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
|
|
struct anv_address scratch_addr =
|
|
anv_address_from_u64(pInfo->scratchData.deviceAddress);
|
|
|
|
const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
|
|
ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
|
|
const uint32_t *pMaxPrimitiveCounts =
|
|
ppMaxPrimitiveCounts ? ppMaxPrimitiveCounts[i] : NULL;
|
|
|
|
ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel,
|
|
pInfo->dstAccelerationStructure);
|
|
|
|
bs->build_method = device->bvh_build_method;
|
|
|
|
bs->bvh_addr = anv_address_from_u64(vk_acceleration_structure_get_va(dst_accel));
|
|
|
|
bs->estimate = get_gpu_size_estimate(pInfo, pBuildRangeInfos,
|
|
pMaxPrimitiveCounts);
|
|
bs->scratch = get_gpu_scratch_layout(scratch_addr, bs->estimate,
|
|
bs->build_method);
|
|
|
|
uint32_t leaf_size, leaf_type;
|
|
|
|
switch (pInfo->type) {
|
|
case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: {
|
|
assert(pInfo->geometryCount == 1);
|
|
|
|
const VkAccelerationStructureGeometryKHR *pGeometry =
|
|
get_geometry(pInfo, 0);
|
|
assert(pGeometry->geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR);
|
|
|
|
const VkAccelerationStructureGeometryInstancesDataKHR *instances =
|
|
&pGeometry->geometry.instances;
|
|
|
|
bs->num_instances = pBuildRangeInfos[0].primitiveCount;
|
|
bs->instances_addr = instances->data.deviceAddress;
|
|
bs->array_of_instances_ptr = instances->arrayOfPointers;
|
|
leaf_type = NODE_TYPE_INSTANCE;
|
|
leaf_size = GENX(RT_BVH_INSTANCE_LEAF_length) * 4;
|
|
break;
|
|
}
|
|
|
|
case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: {
|
|
bs->num_geometries = pInfo->geometryCount;
|
|
leaf_type = NODE_TYPE_QUAD;
|
|
leaf_size = GENX(RT_BVH_QUAD_LEAF_length) * 4;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
unreachable("Unsupported acceleration structure type");
|
|
}
|
|
|
|
size_t geom_struct_size = bs->num_geometries * sizeof(struct Geo);
|
|
size_t geom_prefix_sum_size = ALIGN(sizeof(uint32_t) * (bs->num_geometries + 1), 64);
|
|
|
|
bs->transient_size = geom_prefix_sum_size + geom_struct_size;
|
|
|
|
bs->geom_size_prefix_sum_buffer = transient_total + 0;
|
|
|
|
bs->state = (struct MKBuilderState) {
|
|
.geomDesc_buffer = bs->geom_size_prefix_sum_buffer +
|
|
geom_prefix_sum_size,
|
|
.build_primref_buffer = bs->scratch.primrefs,
|
|
.build_globals = bs->scratch.globals,
|
|
.bvh_buffer = anv_address_physical(bs->bvh_addr),
|
|
.leaf_type = leaf_type,
|
|
.leaf_size = leaf_size,
|
|
};
|
|
|
|
transient_total += bs->transient_size;
|
|
|
|
switch (device->bvh_build_method) {
|
|
case ANV_BVH_BUILD_METHOD_TRIVIAL:
|
|
num_trivial_builds++;
|
|
break;
|
|
case ANV_BVH_BUILD_METHOD_NEW_SAH:
|
|
num_new_sah_builds++;
|
|
break;
|
|
default:
|
|
unreachable("invalid BVH build method");
|
|
}
|
|
|
|
transient_mem_init_globals_size += sizeof(struct BatchedInitGlobalsData);
|
|
}
|
|
|
|
transient_total = align_transient_size(transient_total);
|
|
transient_mem_init_globals_offset = transient_total;
|
|
transient_total += align_transient_size(transient_mem_init_globals_size);
|
|
|
|
size_t transient_mem_binnedsah_size = 0;
|
|
size_t transient_mem_binnedsah_offset = 0;
|
|
size_t private_mem_binnedsah_size = 0;
|
|
size_t private_mem_binnedsah_offset = 0;
|
|
|
|
transient_mem_binnedsah_size = get_batched_binnedsah_transient_mem_size(num_new_sah_builds);
|
|
transient_mem_binnedsah_offset = transient_total;
|
|
transient_total += align_transient_size(transient_mem_binnedsah_size);
|
|
|
|
private_mem_binnedsah_size = get_batched_binnedsah_private_mem_size(num_new_sah_builds);
|
|
private_mem_binnedsah_offset = private_mem_total;
|
|
private_mem_total += align_private_size(private_mem_binnedsah_size);
|
|
|
|
/* Allocate required memory */
|
|
struct anv_cmd_alloc private_mem_alloc =
|
|
anv_cmd_buffer_alloc_space(cmd_buffer, private_mem_total, 64);
|
|
if (private_mem_total > 0 && anv_cmd_alloc_is_empty(private_mem_alloc)) {
|
|
anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
goto error;
|
|
}
|
|
struct anv_cmd_alloc transient_mem_alloc =
|
|
anv_cmd_buffer_alloc_space(cmd_buffer, transient_total, 64);
|
|
if (transient_total > 0 && anv_cmd_alloc_is_empty(transient_mem_alloc)) {
|
|
anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
goto error;
|
|
}
|
|
|
|
uint64_t private_base = anv_address_physical(private_mem_alloc.address);
|
|
uint64_t transient_base = anv_address_physical(transient_mem_alloc.address);
|
|
|
|
/* Prepare transient memory */
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
struct build_state *bs = &builds[i];
|
|
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
|
|
|
|
const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
|
|
ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
|
|
|
|
struct Geo *geos = transient_mem_alloc.map + bs->state.geomDesc_buffer;
|
|
uint32_t *prefixes = transient_mem_alloc.map + bs->geom_size_prefix_sum_buffer;
|
|
uint32_t prefix_sum = 0;
|
|
for (unsigned g = 0; g < bs->num_geometries; g++) {
|
|
const VkAccelerationStructureGeometryKHR *pGeometry = get_geometry(pInfo, g);
|
|
uint32_t prim_count = pBuildRangeInfos[g].primitiveCount;
|
|
geos[g] = vk_to_grl_Geo(pGeometry, prim_count,
|
|
pBuildRangeInfos[g].transformOffset,
|
|
pBuildRangeInfos[g].primitiveOffset,
|
|
pBuildRangeInfos[g].firstVertex);
|
|
|
|
prefixes[g] = prefix_sum;
|
|
prefix_sum += prim_count;
|
|
}
|
|
|
|
prefixes[bs->num_geometries] = prefix_sum;
|
|
|
|
bs->geom_size_prefix_sum_buffer =
|
|
intel_canonical_address(bs->geom_size_prefix_sum_buffer +
|
|
transient_base);
|
|
bs->state.geomDesc_buffer =
|
|
intel_canonical_address(bs->state.geomDesc_buffer +
|
|
transient_base);
|
|
|
|
struct BatchedInitGlobalsData data = {
|
|
.p_build_globals = bs->scratch.globals,
|
|
.p_bvh_buffer = anv_address_physical(bs->bvh_addr),
|
|
|
|
.numPrimitives = 0,
|
|
.numGeometries = bs->num_geometries,
|
|
.numInstances = bs->num_instances,
|
|
|
|
.instance_descs_start = bs->estimate.instance_descs_start,
|
|
.geo_meta_data_start = bs->estimate.geo_meta_data_start,
|
|
.node_data_start = bs->estimate.node_data_start,
|
|
.leaf_data_start = bs->estimate.leaf_data_start,
|
|
.procedural_data_start = bs->estimate.procedural_data_start,
|
|
.back_pointer_start = bs->estimate.back_pointer_start,
|
|
.sizeTotal = bs->estimate.sizeTotal,
|
|
|
|
.leafType = bs->state.leaf_type,
|
|
.leafSize = bs->state.leaf_size,
|
|
};
|
|
|
|
write_memory(transient_mem_alloc,
|
|
transient_mem_init_globals_offset + i * sizeof(data),
|
|
&data, sizeof(data));
|
|
}
|
|
|
|
if (anv_cmd_buffer_is_render_queue(cmd_buffer))
|
|
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
|
|
|
/* Due to the nature of GRL and its heavy use of jumps/predication, we
|
|
* cannot tell exactly in what order the CFE_STATE we insert are going to
|
|
* be executed. So always use the largest possible size.
|
|
*/
|
|
genX(cmd_buffer_ensure_cfe_state)(
|
|
cmd_buffer,
|
|
cmd_buffer->device->physical->max_grl_scratch_size);
|
|
|
|
/* Round 1 : init_globals kernel */
|
|
genX(grl_misc_batched_init_globals)(
|
|
cmd_buffer,
|
|
intel_canonical_address(transient_base +
|
|
transient_mem_init_globals_offset),
|
|
infoCount);
|
|
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
ANV_GRL_FLUSH_FLAGS,
|
|
"building accel struct");
|
|
|
|
/* Round 2 : Copy instance/geometry data from the application provided
|
|
* buffers into the acceleration structures.
|
|
*/
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
struct build_state *bs = &builds[i];
|
|
|
|
/* Metadata */
|
|
if (bs->num_instances) {
|
|
assert(bs->num_geometries == 0);
|
|
|
|
const uint64_t copy_size = bs->num_instances * sizeof(InstanceDesc);
|
|
/* This must be calculated in same way as
|
|
* groupCountForGeoMetaDataCopySize
|
|
*/
|
|
const uint32_t num_threads = (copy_size >> 8) + 3;
|
|
|
|
if (bs->array_of_instances_ptr) {
|
|
genX(grl_misc_copy_instance_ptrs)(
|
|
cmd_buffer,
|
|
anv_address_physical(anv_address_add(bs->bvh_addr,
|
|
bs->estimate.instance_descs_start)),
|
|
bs->instances_addr,
|
|
copy_size, num_threads);
|
|
} else {
|
|
genX(grl_misc_copy_instances)(
|
|
cmd_buffer,
|
|
anv_address_physical(anv_address_add(bs->bvh_addr,
|
|
bs->estimate.instance_descs_start)),
|
|
bs->instances_addr,
|
|
copy_size, num_threads);
|
|
}
|
|
}
|
|
|
|
if (bs->num_geometries) {
|
|
assert(bs->num_instances == 0);
|
|
const uint64_t copy_size = bs->num_geometries * sizeof(struct GeoMetaData);
|
|
|
|
/* This must be calculated in same way as
|
|
* groupCountForGeoMetaDataCopySize
|
|
*/
|
|
const uint32_t num_threads = (copy_size >> 6) + 1;
|
|
|
|
genX(grl_misc_copy_geo_meta_data)(
|
|
cmd_buffer,
|
|
anv_address_physical(anv_address_add(bs->bvh_addr,
|
|
bs->estimate.geo_meta_data_start)),
|
|
bs->state.geomDesc_buffer,
|
|
copy_size,
|
|
num_threads);
|
|
}
|
|
|
|
/* Primrefs */
|
|
if (bs->num_instances) {
|
|
if (bs->array_of_instances_ptr) {
|
|
genX(grl_build_primref_buildPrimirefsFromInstancesArrOfPtrs)(
|
|
cmd_buffer,
|
|
bs->instances_addr,
|
|
PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
|
|
PREFIX_MK_STATE(grl_build_primref, bs->state),
|
|
false /* allowUpdate */);
|
|
} else {
|
|
genX(grl_build_primref_buildPrimirefsFromInstances)(
|
|
cmd_buffer,
|
|
bs->instances_addr,
|
|
PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
|
|
PREFIX_MK_STATE(grl_build_primref, bs->state),
|
|
false /* allowUpdate */);
|
|
}
|
|
}
|
|
|
|
if (bs->num_geometries) {
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
|
|
const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
|
|
ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
|
|
|
|
assert(pInfo->geometryCount == bs->num_geometries);
|
|
for (unsigned g = 0; g < pInfo->geometryCount; g++) {
|
|
const VkAccelerationStructureGeometryKHR *pGeometry =
|
|
get_geometry(pInfo, g);
|
|
|
|
switch (pGeometry->geometryType) {
|
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
|
genX(grl_build_primref_primrefs_from_tris)(
|
|
cmd_buffer,
|
|
PREFIX_MK_STATE(grl_build_primref, bs->state),
|
|
PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
|
|
bs->state.geomDesc_buffer + g * sizeof(struct Geo),
|
|
g,
|
|
vk_to_grl_GeometryFlags(pGeometry->flags),
|
|
/* TODO: Indirect */
|
|
pBuildRangeInfos[g].primitiveCount);
|
|
break;
|
|
|
|
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
|
genX(grl_build_primref_primrefs_from_proc)(
|
|
cmd_buffer,
|
|
PREFIX_MK_STATE(grl_build_primref, bs->state),
|
|
PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
|
|
bs->state.geomDesc_buffer + g * sizeof(struct Geo),
|
|
g,
|
|
vk_to_grl_GeometryFlags(pGeometry->flags),
|
|
/* TODO: Indirect */
|
|
pBuildRangeInfos[g].primitiveCount);
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid geometry type");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
ANV_GRL_FLUSH_FLAGS,
|
|
"building accel struct");
|
|
|
|
/* Dispatch trivial builds */
|
|
if (num_trivial_builds) {
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
struct build_state *bs = &builds[i];
|
|
|
|
if (bs->build_method != ANV_BVH_BUILD_METHOD_TRIVIAL)
|
|
continue;
|
|
|
|
genX(grl_new_sah_builder_single_pass_binsah)(
|
|
cmd_buffer,
|
|
bs->scratch.globals,
|
|
bs->state.bvh_buffer,
|
|
bs->state.build_primref_buffer,
|
|
bs->scratch.leaf_index_buffers,
|
|
false /* alloc_backpointers */);
|
|
}
|
|
}
|
|
|
|
/* Dispatch new SAH builds */
|
|
if (num_new_sah_builds) {
|
|
size_t global_ptrs_offset = transient_mem_binnedsah_offset;
|
|
size_t buffers_info_offset = transient_mem_binnedsah_offset + sizeof(gpuva_t) * num_new_sah_builds;
|
|
|
|
size_t scheduler_offset = private_mem_binnedsah_offset;
|
|
size_t sah_globals_offset = private_mem_binnedsah_offset + get_scheduler_size(num_new_sah_builds);
|
|
|
|
struct SAHBuildArgsBatchable args = {
|
|
.num_builds = infoCount,
|
|
.p_globals_ptrs = intel_canonical_address(transient_base + global_ptrs_offset),
|
|
.p_buffers_info = intel_canonical_address(transient_base + buffers_info_offset),
|
|
.p_scheduler = intel_canonical_address(private_base + scheduler_offset),
|
|
.p_sah_globals = intel_canonical_address(private_base + sah_globals_offset),
|
|
.num_max_qnode_global_root_buffer_entries = MAX2(num_new_sah_builds, QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM),
|
|
};
|
|
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
struct build_state *bs = &builds[i];
|
|
|
|
if (bs->build_method != ANV_BVH_BUILD_METHOD_NEW_SAH)
|
|
continue;
|
|
|
|
uint64_t p_build_primref_index_buffers;
|
|
uint64_t p_bvh2;
|
|
uint64_t p_qnode_child_buffer;
|
|
|
|
get_binnedsah_scratch_buffers(bs,
|
|
&p_qnode_child_buffer,
|
|
&p_build_primref_index_buffers,
|
|
&p_bvh2);
|
|
|
|
struct SAHBuildBuffersInfo buffers = {
|
|
.p_primref_index_buffers = bs->scratch.leaf_index_buffers,
|
|
.p_bvh_base = bs->state.bvh_buffer,
|
|
.p_primrefs_buffer = bs->state.build_primref_buffer,
|
|
.p_bvh2 = p_bvh2,
|
|
.p_qnode_root_buffer = p_qnode_child_buffer,
|
|
.sah_globals_flags = 0,
|
|
};
|
|
|
|
write_memory(transient_mem_alloc, buffers_info_offset, &buffers, sizeof(buffers));
|
|
buffers_info_offset += sizeof(buffers);
|
|
|
|
write_memory(transient_mem_alloc, global_ptrs_offset, &bs->state.build_globals,
|
|
sizeof(bs->state.build_globals));
|
|
global_ptrs_offset += sizeof(bs->state.build_globals);
|
|
}
|
|
|
|
genX(grl_new_sah_builder_new_sah_build_batchable)(
|
|
cmd_buffer, PREFIX_MK_SAH_BUILD_ARGS_BATCHABLE(grl_new_sah_builder, args));
|
|
}
|
|
|
|
if (num_new_sah_builds == 0)
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
ANV_GRL_FLUSH_FLAGS,
|
|
"building accel struct");
|
|
|
|
/* Finally write the leaves. */
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
struct build_state *bs = &builds[i];
|
|
|
|
if (bs->num_instances) {
|
|
assert(bs->num_geometries == 0);
|
|
if (bs->array_of_instances_ptr) {
|
|
genX(grl_leaf_builder_buildLeafDXR_instances_pointers)(cmd_buffer,
|
|
PREFIX_MK_STATE(grl_leaf_builder, bs->state),
|
|
bs->scratch.leaf_index_buffers,
|
|
bs->instances_addr,
|
|
bs->scratch.leaf_index_buffer_stride,
|
|
0 /* offset */,
|
|
bs->estimate.numBuildPrimitives);
|
|
} else {
|
|
genX(grl_leaf_builder_buildLeafDXR_instances)(cmd_buffer,
|
|
PREFIX_MK_STATE(grl_leaf_builder, bs->state),
|
|
bs->scratch.leaf_index_buffers,
|
|
bs->instances_addr,
|
|
bs->scratch.leaf_index_buffer_stride,
|
|
0 /* offset */,
|
|
bs->estimate.numBuildPrimitives);
|
|
}
|
|
}
|
|
|
|
if (bs->num_geometries) {
|
|
assert(bs->num_instances == 0);
|
|
const uint64_t p_numPrimitives =
|
|
bs->state.build_globals + offsetof(struct Globals, numPrimitives);
|
|
|
|
assert(bs->estimate.numProcedurals == 0 ||
|
|
bs->estimate.numTriangles == 0);
|
|
if (bs->estimate.numProcedurals) {
|
|
genX(grl_leaf_builder_buildLeafDXR_procedurals)(
|
|
cmd_buffer,
|
|
PREFIX_MK_STATE(grl_leaf_builder, bs->state),
|
|
bs->scratch.leaf_index_buffers,
|
|
bs->scratch.leaf_index_buffer_stride,
|
|
0 /* offset */,
|
|
p_numPrimitives);
|
|
} else {
|
|
genX(grl_leaf_builder_buildLeafDXR_quads)(
|
|
cmd_buffer,
|
|
PREFIX_MK_STATE(grl_leaf_builder, bs->state),
|
|
bs->scratch.leaf_index_buffers,
|
|
bs->scratch.leaf_index_buffer_stride,
|
|
0 /* offset */,
|
|
p_numPrimitives,
|
|
false /* allow_updates */);
|
|
}
|
|
}
|
|
}
|
|
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
ANV_GRL_FLUSH_FLAGS,
|
|
"building accel struct");
|
|
|
|
error:
|
|
vk_free(&cmd_buffer->device->vk.alloc, builds);
|
|
}
|
|
|
|
void
|
|
genX(CmdBuildAccelerationStructuresKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
|
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
if (anv_batch_has_error(&cmd_buffer->batch))
|
|
return;
|
|
|
|
cmd_build_acceleration_structures(cmd_buffer, infoCount, pInfos,
|
|
ppBuildRangeInfos, NULL, NULL, NULL);
|
|
}
|
|
|
|
void
|
|
genX(CmdBuildAccelerationStructuresIndirectKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
|
const VkDeviceAddress* pIndirectDeviceAddresses,
|
|
const uint32_t* pIndirectStrides,
|
|
const uint32_t* const* ppMaxPrimitiveCounts)
|
|
{
|
|
unreachable("Unimplemented");
|
|
}
|
|
|
|
void
|
|
genX(CmdCopyAccelerationStructureKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyAccelerationStructureInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
|
|
ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
|
|
|
|
assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR ||
|
|
pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR);
|
|
|
|
if (pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR) {
|
|
uint64_t src_size_addr =
|
|
vk_acceleration_structure_get_va(src_accel) +
|
|
offsetof(struct BVHBase, Meta.allocationSize);
|
|
genX(grl_copy_clone_indirect)(
|
|
cmd_buffer,
|
|
vk_acceleration_structure_get_va(dst_accel),
|
|
vk_acceleration_structure_get_va(src_accel),
|
|
src_size_addr);
|
|
} else {
|
|
genX(grl_copy_compact)(
|
|
cmd_buffer,
|
|
vk_acceleration_structure_get_va(dst_accel),
|
|
vk_acceleration_structure_get_va(src_accel));
|
|
}
|
|
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
|
"after copy acceleration struct");
|
|
}
|
|
|
|
void
|
|
genX(CmdCopyAccelerationStructureToMemoryKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
|
|
struct anv_device *device = cmd_buffer->device;
|
|
uint64_t src_size_addr =
|
|
vk_acceleration_structure_get_va(src_accel) +
|
|
offsetof(struct BVHBase, Meta.allocationSize);
|
|
|
|
assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR);
|
|
|
|
genX(grl_copy_serialize_indirect)(
|
|
cmd_buffer,
|
|
pInfo->dst.deviceAddress,
|
|
vk_acceleration_structure_get_va(src_accel),
|
|
anv_address_physical(device->rt_uuid_addr),
|
|
src_size_addr);
|
|
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
|
"after copy acceleration struct");
|
|
}
|
|
|
|
void
|
|
genX(CmdCopyMemoryToAccelerationStructureKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
|
|
|
|
assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR);
|
|
|
|
uint64_t src_size_addr = pInfo->src.deviceAddress +
|
|
offsetof(struct SerializationHeader, DeserializedSizeInBytes);
|
|
genX(grl_copy_deserialize_indirect)(
|
|
cmd_buffer,
|
|
vk_acceleration_structure_get_va(dst_accel),
|
|
pInfo->src.deviceAddress,
|
|
src_size_addr);
|
|
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
|
"after copy acceleration struct");
|
|
}
|
|
|
|
/* TODO: Host commands */
|
|
|
|
VkResult
|
|
genX(BuildAccelerationStructuresKHR)(
|
|
VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
|
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VkResult
|
|
genX(CopyAccelerationStructureKHR)(
|
|
VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
const VkCopyAccelerationStructureInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VkResult
|
|
genX(CopyAccelerationStructureToMemoryKHR)(
|
|
VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VkResult
|
|
genX(CopyMemoryToAccelerationStructureKHR)(
|
|
VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VkResult
|
|
genX(WriteAccelerationStructuresPropertiesKHR)(
|
|
VkDevice _device,
|
|
uint32_t accelerationStructureCount,
|
|
const VkAccelerationStructureKHR* pAccelerationStructures,
|
|
VkQueryType queryType,
|
|
size_t dataSize,
|
|
void* pData,
|
|
size_t stride)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
#endif /* GFX_VERx10 >= 125 */
|