radv: Build accaleration structures using LBVH
This sorts the leaf nodes along a morton curve before creating the internal nodes. For reference: https://developer.nvidia.com/blog/thinking-parallel-part-iii-tree-construction-gpu/ Ray query cts: Test run totals: Passed: 22418/23426 (95.7%) Failed: 0/23426 (0.0%) Not supported: 1008/23426 (4.3%) Warnings: 0/23426 (0.0%) Waived: 0/23426 (0.0%) Signed-off-by: Konstantin Seurer <konstantin.seurer@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15648>
This commit is contained in:
@@ -29,12 +29,48 @@
|
||||
#include "radv_cs.h"
|
||||
#include "radv_meta.h"
|
||||
|
||||
#include "radix_sort/radv_radix_sort.h"
|
||||
|
||||
/* Min and max bounds of the bvh used to compute morton codes */
|
||||
#define SCRATCH_TOTAL_BOUNDS_SIZE (6 * sizeof(float))
|
||||
|
||||
enum accel_struct_build {
|
||||
accel_struct_build_unoptimized,
|
||||
accel_struct_build_lbvh,
|
||||
};
|
||||
|
||||
static enum accel_struct_build
|
||||
get_accel_struct_build(const struct radv_physical_device *pdevice,
|
||||
VkAccelerationStructureBuildTypeKHR buildType)
|
||||
{
|
||||
if (buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
|
||||
return accel_struct_build_unoptimized;
|
||||
|
||||
return (pdevice->rad_info.chip_class < GFX10) ? accel_struct_build_unoptimized
|
||||
: accel_struct_build_lbvh;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_node_id_stride(enum accel_struct_build build_mode)
|
||||
{
|
||||
switch (build_mode) {
|
||||
case accel_struct_build_unoptimized:
|
||||
return 4;
|
||||
case accel_struct_build_lbvh:
|
||||
return 8;
|
||||
default:
|
||||
unreachable("Unhandled accel_struct_build!");
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_GetAccelerationStructureBuildSizesKHR(
|
||||
VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
|
||||
const uint32_t *pMaxPrimitiveCounts, VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
|
||||
uint64_t triangles = 0, boxes = 0, instances = 0;
|
||||
|
||||
STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
|
||||
@@ -79,9 +115,30 @@ radv_GetAccelerationStructureBuildSizesKHR(
|
||||
|
||||
pSizeInfo->accelerationStructureSize = size;
|
||||
|
||||
/* 2x the max number of nodes in a BVH layer (one uint32_t each) */
|
||||
pSizeInfo->updateScratchSize = pSizeInfo->buildScratchSize =
|
||||
MAX2(4096, 2 * (boxes + instances + triangles) * sizeof(uint32_t));
|
||||
/* 2x the max number of nodes in a BVH layer and order information for sorting when using
|
||||
* LBVH (one uint32_t each, two buffers) plus space to store the bounds.
|
||||
* LBVH is only supported for device builds and hardware that supports global atomics.
|
||||
*/
|
||||
enum accel_struct_build build_mode = get_accel_struct_build(device->physical_device, buildType);
|
||||
uint32_t node_id_stride = get_node_id_stride(build_mode);
|
||||
|
||||
uint32_t leaf_count = boxes + instances + triangles;
|
||||
VkDeviceSize scratchSize = 2 * leaf_count * node_id_stride;
|
||||
|
||||
if (build_mode == accel_struct_build_lbvh) {
|
||||
radix_sort_vk_memory_requirements_t requirements;
|
||||
radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort,
|
||||
leaf_count, &requirements);
|
||||
|
||||
/* Make sure we have the space required by the radix sort. */
|
||||
scratchSize = MAX2(scratchSize, requirements.keyvals_size * 2);
|
||||
|
||||
scratchSize += requirements.internal_size + SCRATCH_TOTAL_BOUNDS_SIZE;
|
||||
}
|
||||
|
||||
scratchSize = MAX2(4096, scratchSize);
|
||||
pSizeInfo->updateScratchSize = scratchSize;
|
||||
pSizeInfo->buildScratchSize = scratchSize;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
@@ -745,6 +802,19 @@ radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR defer
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static nir_builder
|
||||
create_accel_build_shader(struct radv_device *device, const char *name)
|
||||
{
|
||||
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "%s", name);
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
assert(b.shader->info.workgroup_size[1] == 1);
|
||||
assert(b.shader->info.workgroup_size[2] == 1);
|
||||
assert(!b.shader->info.workgroup_size_variable);
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_indices(nir_builder *b, nir_ssa_def *addr, nir_ssa_def *type, nir_ssa_def *id)
|
||||
{
|
||||
@@ -935,6 +1005,21 @@ struct build_primitive_constants {
|
||||
};
|
||||
};
|
||||
|
||||
struct bounds_constants {
|
||||
uint64_t node_addr;
|
||||
uint64_t scratch_addr;
|
||||
};
|
||||
|
||||
struct morton_constants {
|
||||
uint64_t node_addr;
|
||||
uint64_t scratch_addr;
|
||||
};
|
||||
|
||||
struct fill_constants {
|
||||
uint64_t addr;
|
||||
uint32_t value;
|
||||
};
|
||||
|
||||
struct build_internal_constants {
|
||||
uint64_t node_dst_addr;
|
||||
uint64_t scratch_addr;
|
||||
@@ -972,6 +1057,29 @@ nir_invert_3x3(nir_builder *b, nir_ssa_def *in[3][3], nir_ssa_def *out[3][3])
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
id_to_node_id_offset(nir_builder *b, nir_ssa_def *global_id,
|
||||
const struct radv_physical_device *pdevice)
|
||||
{
|
||||
uint32_t stride = get_node_id_stride(
|
||||
get_accel_struct_build(pdevice, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR));
|
||||
|
||||
return nir_umul24(b, global_id, nir_imm_int(b, stride));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
id_to_morton_offset(nir_builder *b, nir_ssa_def *global_id,
|
||||
const struct radv_physical_device *pdevice)
|
||||
{
|
||||
enum accel_struct_build build_mode =
|
||||
get_accel_struct_build(pdevice, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
|
||||
assert(build_mode == accel_struct_build_lbvh);
|
||||
|
||||
uint32_t stride = get_node_id_stride(build_mode);
|
||||
|
||||
return nir_iadd_imm(b, nir_umul24(b, global_id, nir_imm_int(b, stride)), sizeof(uint32_t));
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_leaf_shader(struct radv_device *dev)
|
||||
{
|
||||
@@ -1003,9 +1111,15 @@ build_leaf_shader(struct radv_device *dev)
|
||||
nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 1),
|
||||
nir_imm_int(&b, b.shader->info.workgroup_size[0])),
|
||||
nir_channels(&b, nir_load_local_invocation_id(&b), 1));
|
||||
scratch_addr = nir_iadd(
|
||||
&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, scratch_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 4)))));
|
||||
nir_ssa_def *scratch_dst_addr =
|
||||
nir_iadd(&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, scratch_offset,
|
||||
id_to_node_id_offset(&b, global_id, dev->physical_device))));
|
||||
|
||||
nir_variable *bounds[2] = {
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
|
||||
};
|
||||
|
||||
nir_push_if(&b, nir_ieq_imm(&b, geom_type, VK_GEOMETRY_TYPE_TRIANGLES_KHR));
|
||||
{ /* Triangles */
|
||||
@@ -1053,6 +1167,22 @@ build_leaf_shader(struct radv_device *dev)
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
node_data[i * 3 + j] = nir_fdph(&b, positions[i], nir_load_var(&b, transform[j]));
|
||||
|
||||
nir_ssa_def *min_bound = NULL;
|
||||
nir_ssa_def *max_bound = NULL;
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
nir_ssa_def *position = nir_vec(&b, node_data + i * 3, 3);
|
||||
if (min_bound) {
|
||||
min_bound = nir_fmin(&b, min_bound, position);
|
||||
max_bound = nir_fmax(&b, max_bound, position);
|
||||
} else {
|
||||
min_bound = position;
|
||||
max_bound = position;
|
||||
}
|
||||
}
|
||||
|
||||
nir_store_var(&b, bounds[0], min_bound, 7);
|
||||
nir_store_var(&b, bounds[1], max_bound, 7);
|
||||
|
||||
node_data[12] = global_id;
|
||||
node_data[13] = geometry_id;
|
||||
node_data[15] = nir_imm_int(&b, 9);
|
||||
@@ -1066,7 +1196,7 @@ build_leaf_shader(struct radv_device *dev)
|
||||
}
|
||||
|
||||
nir_ssa_def *node_id = nir_ushr_imm(&b, node_offset, 3);
|
||||
nir_build_store_global(&b, node_id, scratch_addr);
|
||||
nir_build_store_global(&b, node_id, scratch_dst_addr);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
nir_push_if(&b, nir_ieq_imm(&b, geom_type, VK_GEOMETRY_TYPE_AABBS_KHR));
|
||||
@@ -1077,14 +1207,18 @@ build_leaf_shader(struct radv_device *dev)
|
||||
nir_ssa_def *node_offset =
|
||||
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 64)));
|
||||
nir_ssa_def *aabb_node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
|
||||
|
||||
nir_ssa_def *node_id = nir_iadd_imm(&b, nir_ushr_imm(&b, node_offset, 3), 7);
|
||||
nir_build_store_global(&b, node_id, scratch_addr);
|
||||
nir_build_store_global(&b, node_id, scratch_dst_addr);
|
||||
|
||||
aabb_addr = nir_iadd(&b, aabb_addr, nir_u2u64(&b, nir_imul(&b, aabb_stride, global_id)));
|
||||
|
||||
nir_ssa_def *min_bound = nir_build_load_global(&b, 3, 32, nir_iadd_imm(&b, aabb_addr, 0));
|
||||
nir_ssa_def *max_bound = nir_build_load_global(&b, 3, 32, nir_iadd_imm(&b, aabb_addr, 12));
|
||||
|
||||
nir_store_var(&b, bounds[0], min_bound, 7);
|
||||
nir_store_var(&b, bounds[1], max_bound, 7);
|
||||
|
||||
nir_ssa_def *values[] = {nir_channel(&b, min_bound, 0),
|
||||
nir_channel(&b, min_bound, 1),
|
||||
nir_channel(&b, min_bound, 2),
|
||||
@@ -1130,16 +1264,9 @@ build_leaf_shader(struct radv_device *dev)
|
||||
nir_ssa_def *node_offset =
|
||||
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 128)));
|
||||
node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
|
||||
|
||||
nir_ssa_def *node_id = nir_iadd_imm(&b, nir_ushr_imm(&b, node_offset, 3), 6);
|
||||
nir_build_store_global(&b, node_id, scratch_addr);
|
||||
|
||||
nir_variable *bounds[2] = {
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
|
||||
};
|
||||
|
||||
nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
|
||||
nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
|
||||
nir_build_store_global(&b, node_id, scratch_dst_addr);
|
||||
|
||||
nir_ssa_def *header_addr = nir_pack_64_2x32(&b, nir_channels(&b, inst3, 12));
|
||||
nir_push_if(&b, nir_ine_imm(&b, header_addr, 0));
|
||||
@@ -1204,6 +1331,32 @@ build_leaf_shader(struct radv_device *dev)
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
if (get_accel_struct_build(dev->physical_device,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) !=
|
||||
accel_struct_build_unoptimized) {
|
||||
nir_ssa_def *min = nir_load_var(&b, bounds[0]);
|
||||
nir_ssa_def *max = nir_load_var(&b, bounds[1]);
|
||||
|
||||
nir_ssa_def *min_reduced = nir_reduce(&b, min, .reduction_op = nir_op_fmin);
|
||||
nir_ssa_def *max_reduced = nir_reduce(&b, max, .reduction_op = nir_op_fmax);
|
||||
|
||||
nir_push_if(&b, nir_elect(&b, 1));
|
||||
|
||||
nir_global_atomic_fmin(&b, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 24)),
|
||||
nir_channel(&b, min_reduced, 0));
|
||||
nir_global_atomic_fmin(&b, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 20)),
|
||||
nir_channel(&b, min_reduced, 1));
|
||||
nir_global_atomic_fmin(&b, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 16)),
|
||||
nir_channel(&b, min_reduced, 2));
|
||||
|
||||
nir_global_atomic_fmax(&b, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 12)),
|
||||
nir_channel(&b, max_reduced, 0));
|
||||
nir_global_atomic_fmax(&b, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 8)),
|
||||
nir_channel(&b, max_reduced, 1));
|
||||
nir_global_atomic_fmax(&b, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 4)),
|
||||
nir_channel(&b, max_reduced, 2));
|
||||
}
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
@@ -1267,6 +1420,89 @@ determine_bounds(nir_builder *b, nir_ssa_def *node_addr, nir_ssa_def *node_id,
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
/* https://developer.nvidia.com/blog/thinking-parallel-part-iii-tree-construction-gpu/ */
|
||||
static nir_ssa_def *
|
||||
build_morton_component(nir_builder *b, nir_ssa_def *x)
|
||||
{
|
||||
x = nir_iand_imm(b, nir_imul_imm(b, x, 0x00000101u), 0x0F00F00Fu);
|
||||
x = nir_iand_imm(b, nir_imul_imm(b, x, 0x00000011u), 0xC30C30C3u);
|
||||
x = nir_iand_imm(b, nir_imul_imm(b, x, 0x00000005u), 0x49249249u);
|
||||
return x;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_morton_shader(struct radv_device *dev)
|
||||
{
|
||||
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
|
||||
|
||||
nir_builder b = create_accel_build_shader(dev, "accel_build_morton_shader");
|
||||
|
||||
/*
|
||||
* push constants:
|
||||
* i32 x 2: node address
|
||||
* i32 x 2: scratch address
|
||||
*/
|
||||
nir_ssa_def *pconst0 =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
|
||||
|
||||
nir_ssa_def *node_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 0b0011));
|
||||
nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 0b1100));
|
||||
|
||||
nir_ssa_def *global_id =
|
||||
nir_iadd(&b,
|
||||
nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0),
|
||||
b.shader->info.workgroup_size[0]),
|
||||
nir_load_local_invocation_index(&b));
|
||||
|
||||
nir_ssa_def *node_id_addr = nir_iadd(
|
||||
&b, scratch_addr, nir_u2u64(&b, id_to_node_id_offset(&b, global_id, dev->physical_device)));
|
||||
nir_ssa_def *node_id =
|
||||
nir_build_load_global(&b, 1, 32, node_id_addr, .align_mul = 4, .align_offset = 0);
|
||||
|
||||
nir_variable *node_bounds[2] = {
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
|
||||
};
|
||||
|
||||
determine_bounds(&b, node_addr, node_id, node_bounds);
|
||||
|
||||
nir_ssa_def *node_min = nir_load_var(&b, node_bounds[0]);
|
||||
nir_ssa_def *node_max = nir_load_var(&b, node_bounds[1]);
|
||||
nir_ssa_def *node_pos =
|
||||
nir_fmul(&b, nir_fadd(&b, node_min, node_max), nir_imm_vec3(&b, 0.5, 0.5, 0.5));
|
||||
|
||||
nir_ssa_def *bvh_min =
|
||||
nir_build_load_global(&b, 3, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 24)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_ssa_def *bvh_max =
|
||||
nir_build_load_global(&b, 3, 32, nir_isub(&b, scratch_addr, nir_imm_int64(&b, 12)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_ssa_def *bvh_size = nir_fsub(&b, bvh_max, bvh_min);
|
||||
|
||||
nir_ssa_def *normalized_node_pos = nir_fdiv(&b, nir_fsub(&b, node_pos, bvh_min), bvh_size);
|
||||
|
||||
nir_ssa_def *x_int =
|
||||
nir_f2u32(&b, nir_fmul_imm(&b, nir_channel(&b, normalized_node_pos, 0), 255.0));
|
||||
nir_ssa_def *x_morton = build_morton_component(&b, x_int);
|
||||
|
||||
nir_ssa_def *y_int =
|
||||
nir_f2u32(&b, nir_fmul_imm(&b, nir_channel(&b, normalized_node_pos, 1), 255.0));
|
||||
nir_ssa_def *y_morton = build_morton_component(&b, y_int);
|
||||
|
||||
nir_ssa_def *z_int =
|
||||
nir_f2u32(&b, nir_fmul_imm(&b, nir_channel(&b, normalized_node_pos, 2), 255.0));
|
||||
nir_ssa_def *z_morton = build_morton_component(&b, z_int);
|
||||
|
||||
nir_ssa_def *morton_code = nir_iadd(
|
||||
&b, nir_iadd(&b, nir_ishl_imm(&b, x_morton, 2), nir_ishl_imm(&b, y_morton, 1)), z_morton);
|
||||
|
||||
nir_ssa_def *dst_addr = nir_iadd(
|
||||
&b, scratch_addr, nir_u2u64(&b, id_to_morton_offset(&b, global_id, dev->physical_device)));
|
||||
nir_build_store_global(&b, morton_code, dst_addr, .align_mul = 4);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_internal_shader(struct radv_device *dev)
|
||||
{
|
||||
@@ -1308,12 +1544,22 @@ build_internal_shader(struct radv_device *dev)
|
||||
|
||||
nir_ssa_def *node_offset = nir_iadd(&b, node_dst_offset, nir_ishl_imm(&b, global_id, 7));
|
||||
nir_ssa_def *node_dst_addr = nir_iadd(&b, node_addr, nir_u2u64(&b, node_offset));
|
||||
nir_ssa_def *src_nodes = nir_build_load_global(
|
||||
&b, 4, 32,
|
||||
nir_iadd(&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, src_scratch_offset, nir_ishl_imm(&b, global_id, 4)))));
|
||||
|
||||
nir_build_store_global(&b, src_nodes, nir_iadd_imm(&b, node_dst_addr, 0));
|
||||
nir_ssa_def *src_base_addr =
|
||||
nir_iadd(&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, src_scratch_offset,
|
||||
id_to_node_id_offset(&b, src_idx, dev->physical_device))));
|
||||
|
||||
enum accel_struct_build build_mode =
|
||||
get_accel_struct_build(dev->physical_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
|
||||
uint32_t node_id_stride = get_node_id_stride(build_mode);
|
||||
|
||||
nir_ssa_def *src_nodes[4];
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
src_nodes[i] =
|
||||
nir_build_load_global(&b, 1, 32, nir_iadd_imm(&b, src_base_addr, i * node_id_stride));
|
||||
nir_build_store_global(&b, src_nodes[i], nir_iadd_imm(&b, node_dst_addr, i * 4));
|
||||
}
|
||||
|
||||
nir_ssa_def *total_bounds[2] = {
|
||||
nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
|
||||
@@ -1329,7 +1575,7 @@ build_internal_shader(struct radv_device *dev)
|
||||
nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
|
||||
|
||||
nir_push_if(&b, nir_ilt(&b, nir_imm_int(&b, i), src_count));
|
||||
determine_bounds(&b, node_addr, nir_channel(&b, src_nodes, i), bounds);
|
||||
determine_bounds(&b, node_addr, src_nodes[i], bounds);
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
|
||||
nir_iadd_imm(&b, node_dst_addr, 16 + 24 * i));
|
||||
@@ -1342,7 +1588,8 @@ build_internal_shader(struct radv_device *dev)
|
||||
nir_ssa_def *node_id = nir_iadd_imm(&b, nir_ushr_imm(&b, node_offset, 3), 5);
|
||||
nir_ssa_def *dst_scratch_addr =
|
||||
nir_iadd(&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, dst_scratch_offset, nir_ishl_imm(&b, global_id, 2))));
|
||||
nir_u2u64(&b, nir_iadd(&b, dst_scratch_offset,
|
||||
id_to_node_id_offset(&b, global_id, dev->physical_device))));
|
||||
nir_build_store_global(&b, node_id, dst_scratch_addr);
|
||||
|
||||
nir_push_if(&b, fill_header);
|
||||
@@ -1586,12 +1833,82 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device)
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline,
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.morton_pipeline,
|
||||
&state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.copy_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.internal_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.leaf_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.morton_p_layout, &state->alloc);
|
||||
|
||||
if (state->accel_struct_build.radix_sort)
|
||||
radix_sort_vk_destroy(state->accel_struct_build.radix_sort, radv_device_to_handle(device),
|
||||
&state->alloc);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
create_build_pipeline(struct radv_device *device, nir_shader *shader, unsigned push_constant_size,
|
||||
VkPipeline *pipeline, VkPipelineLayout *layout)
|
||||
{
|
||||
const VkPipelineLayoutCreateInfo pl_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 0,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges =
|
||||
&(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
|
||||
};
|
||||
|
||||
VkResult result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
|
||||
&device->meta_state.alloc, layout);
|
||||
if (result != VK_SUCCESS) {
|
||||
radv_device_finish_accel_struct_build_state(device);
|
||||
ralloc_free(shader);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkPipelineShaderStageCreateInfo shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = vk_shader_module_handle_from_nir(shader),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = shader_stage,
|
||||
.flags = 0,
|
||||
.layout = *layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(radv_device_to_handle(device),
|
||||
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
|
||||
&pipeline_info, &device->meta_state.alloc, pipeline);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
radv_device_finish_accel_struct_build_state(device);
|
||||
ralloc_free(shader);
|
||||
return result;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
radix_sort_fill_buffer(VkCommandBuffer commandBuffer,
|
||||
radix_sort_vk_buffer_info_t const *buffer_info, VkDeviceSize offset,
|
||||
VkDeviceSize size, uint32_t data)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
assert(size % 4 == 0);
|
||||
assert(size != VK_WHOLE_SIZE);
|
||||
|
||||
radv_fill_buffer_shader(cmd_buffer, buffer_info->devaddr + buffer_info->offset + offset, size,
|
||||
data);
|
||||
}
|
||||
|
||||
VkResult
|
||||
@@ -1602,122 +1919,47 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
|
||||
nir_shader *internal_cs = build_internal_shader(device);
|
||||
nir_shader *copy_cs = build_copy_shader(device);
|
||||
|
||||
const VkPipelineLayoutCreateInfo leaf_pl_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 0,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
||||
sizeof(struct build_primitive_constants)},
|
||||
};
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info,
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.accel_struct_build.leaf_p_layout);
|
||||
result = create_build_pipeline(device, leaf_cs, sizeof(struct build_primitive_constants),
|
||||
&device->meta_state.accel_struct_build.leaf_pipeline,
|
||||
&device->meta_state.accel_struct_build.leaf_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
return result;
|
||||
|
||||
VkPipelineShaderStageCreateInfo leaf_shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = vk_shader_module_handle_from_nir(leaf_cs),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo leaf_pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = leaf_shader_stage,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.accel_struct_build.leaf_p_layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(
|
||||
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
|
||||
&leaf_pipeline_info, NULL, &device->meta_state.accel_struct_build.leaf_pipeline);
|
||||
result = create_build_pipeline(device, internal_cs, sizeof(struct build_internal_constants),
|
||||
&device->meta_state.accel_struct_build.internal_pipeline,
|
||||
&device->meta_state.accel_struct_build.internal_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
return result;
|
||||
|
||||
const VkPipelineLayoutCreateInfo internal_pl_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 0,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
||||
sizeof(struct build_internal_constants)},
|
||||
};
|
||||
result = create_build_pipeline(device, copy_cs, sizeof(struct copy_constants),
|
||||
&device->meta_state.accel_struct_build.copy_pipeline,
|
||||
&device->meta_state.accel_struct_build.copy_p_layout);
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &internal_pl_create_info,
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.accel_struct_build.internal_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
return result;
|
||||
|
||||
VkPipelineShaderStageCreateInfo internal_shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = vk_shader_module_handle_from_nir(internal_cs),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
if (get_accel_struct_build(device->physical_device,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ==
|
||||
accel_struct_build_lbvh) {
|
||||
nir_shader *morton_cs = build_morton_shader(device);
|
||||
|
||||
VkComputePipelineCreateInfo internal_pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = internal_shader_stage,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.accel_struct_build.internal_p_layout,
|
||||
};
|
||||
result = create_build_pipeline(device, morton_cs, sizeof(struct morton_constants),
|
||||
&device->meta_state.accel_struct_build.morton_pipeline,
|
||||
&device->meta_state.accel_struct_build.morton_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = radv_CreateComputePipelines(
|
||||
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
|
||||
&internal_pipeline_info, NULL, &device->meta_state.accel_struct_build.internal_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
device->meta_state.accel_struct_build.radix_sort =
|
||||
radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc,
|
||||
radv_pipeline_cache_to_handle(&device->meta_state.cache));
|
||||
|
||||
const VkPipelineLayoutCreateInfo copy_pl_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 0,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges =
|
||||
&(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct copy_constants)},
|
||||
};
|
||||
struct radix_sort_vk_sort_devaddr_info *radix_sort_info =
|
||||
&device->meta_state.accel_struct_build.radix_sort_info;
|
||||
radix_sort_info->ext = NULL;
|
||||
radix_sort_info->key_bits = 24;
|
||||
radix_sort_info->fill_buffer = radix_sort_fill_buffer;
|
||||
}
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), ©_pl_create_info,
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.accel_struct_build.copy_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
VkPipelineShaderStageCreateInfo copy_shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = vk_shader_module_handle_from_nir(copy_cs),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo copy_pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = copy_shader_stage,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.accel_struct_build.copy_p_layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(
|
||||
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
|
||||
©_pipeline_info, NULL, &device->meta_state.accel_struct_build.copy_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
ralloc_free(copy_cs);
|
||||
ralloc_free(internal_cs);
|
||||
ralloc_free(leaf_cs);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
radv_device_finish_accel_struct_build_state(device);
|
||||
ralloc_free(copy_cs);
|
||||
ralloc_free(internal_cs);
|
||||
ralloc_free(leaf_cs);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1725,6 +1967,8 @@ struct bvh_state {
|
||||
uint32_t node_offset;
|
||||
uint32_t node_count;
|
||||
uint32_t scratch_offset;
|
||||
uint32_t buffer_1_offset;
|
||||
uint32_t buffer_2_offset;
|
||||
|
||||
uint32_t instance_offset;
|
||||
uint32_t instance_count;
|
||||
@@ -1739,12 +1983,35 @@ radv_CmdBuildAccelerationStructuresKHR(
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct radv_meta_saved_state saved_state;
|
||||
|
||||
enum radv_cmd_flush_bits flush_bits =
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT,
|
||||
NULL) |
|
||||
radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT,
|
||||
NULL);
|
||||
|
||||
enum accel_struct_build build_mode = get_accel_struct_build(
|
||||
cmd_buffer->device->physical_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
|
||||
uint32_t node_id_stride = get_node_id_stride(build_mode);
|
||||
uint32_t scratch_offset =
|
||||
(build_mode != accel_struct_build_unoptimized) ? SCRATCH_TOTAL_BOUNDS_SIZE : 0;
|
||||
|
||||
radv_meta_save(
|
||||
&saved_state, cmd_buffer,
|
||||
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
|
||||
struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
|
||||
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
if (build_mode != accel_struct_build_unoptimized) {
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
/* Clear the bvh bounds with nan. */
|
||||
radv_fill_buffer_shader(cmd_buffer, pInfos[i].scratchData.deviceAddress, 6 * sizeof(float),
|
||||
0x7FC00000);
|
||||
}
|
||||
|
||||
cmd_buffer->state.flush_bits |= flush_bits;
|
||||
}
|
||||
|
||||
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
|
||||
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
@@ -1753,7 +2020,7 @@ radv_CmdBuildAccelerationStructuresKHR(
|
||||
|
||||
struct build_primitive_constants prim_consts = {
|
||||
.node_dst_addr = radv_accel_struct_get_va(accel_struct),
|
||||
.scratch_addr = pInfos[i].scratchData.deviceAddress,
|
||||
.scratch_addr = pInfos[i].scratchData.deviceAddress + scratch_offset,
|
||||
.dst_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64) + 128,
|
||||
.dst_scratch_offset = 0,
|
||||
};
|
||||
@@ -1805,20 +2072,91 @@ radv_CmdBuildAccelerationStructuresKHR(
|
||||
unreachable("Unknown geometryType");
|
||||
}
|
||||
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(prim_consts),
|
||||
&prim_consts);
|
||||
radv_CmdPushConstants(
|
||||
commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(prim_consts), &prim_consts);
|
||||
radv_unaligned_dispatch(cmd_buffer, ppBuildRangeInfos[i][j].primitiveCount, 1, 1);
|
||||
prim_consts.dst_offset += prim_size * ppBuildRangeInfos[i][j].primitiveCount;
|
||||
prim_consts.dst_scratch_offset += 4 * ppBuildRangeInfos[i][j].primitiveCount;
|
||||
prim_consts.dst_scratch_offset +=
|
||||
node_id_stride * ppBuildRangeInfos[i][j].primitiveCount;
|
||||
}
|
||||
}
|
||||
bvh_states[i].node_offset = prim_consts.dst_offset;
|
||||
bvh_states[i].node_count = prim_consts.dst_scratch_offset / 4;
|
||||
bvh_states[i].node_count = prim_consts.dst_scratch_offset / node_id_stride;
|
||||
}
|
||||
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
if (build_mode == accel_struct_build_lbvh) {
|
||||
cmd_buffer->state.flush_bits |= flush_bits;
|
||||
|
||||
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.morton_pipeline);
|
||||
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
|
||||
pInfos[i].dstAccelerationStructure);
|
||||
|
||||
const struct morton_constants consts = {
|
||||
.node_addr = radv_accel_struct_get_va(accel_struct),
|
||||
.scratch_addr = pInfos[i].scratchData.deviceAddress + SCRATCH_TOTAL_BOUNDS_SIZE,
|
||||
};
|
||||
|
||||
radv_CmdPushConstants(commandBuffer,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.morton_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
||||
radv_unaligned_dispatch(cmd_buffer, bvh_states[i].node_count, 1, 1);
|
||||
}
|
||||
|
||||
cmd_buffer->state.flush_bits |= flush_bits;
|
||||
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
struct radix_sort_vk_memory_requirements requirements;
|
||||
radix_sort_vk_get_memory_requirements(
|
||||
cmd_buffer->device->meta_state.accel_struct_build.radix_sort, bvh_states[i].node_count,
|
||||
&requirements);
|
||||
|
||||
struct radix_sort_vk_sort_devaddr_info info =
|
||||
cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info;
|
||||
info.count = bvh_states[i].node_count;
|
||||
|
||||
VkDeviceAddress base_addr =
|
||||
pInfos[i].scratchData.deviceAddress + SCRATCH_TOTAL_BOUNDS_SIZE;
|
||||
|
||||
info.keyvals_even.buffer = VK_NULL_HANDLE;
|
||||
info.keyvals_even.offset = 0;
|
||||
info.keyvals_even.devaddr = base_addr;
|
||||
|
||||
info.keyvals_odd = base_addr + requirements.keyvals_size;
|
||||
|
||||
info.internal.buffer = VK_NULL_HANDLE;
|
||||
info.internal.offset = 0;
|
||||
info.internal.devaddr = base_addr + requirements.keyvals_size * 2;
|
||||
|
||||
VkDeviceAddress result_addr;
|
||||
radix_sort_vk_sort_devaddr(cmd_buffer->device->meta_state.accel_struct_build.radix_sort,
|
||||
&info, radv_device_to_handle(cmd_buffer->device), commandBuffer,
|
||||
&result_addr);
|
||||
|
||||
assert(result_addr == info.keyvals_even.devaddr || result_addr == info.keyvals_odd);
|
||||
|
||||
if (result_addr == info.keyvals_even.devaddr) {
|
||||
bvh_states[i].buffer_1_offset = SCRATCH_TOTAL_BOUNDS_SIZE;
|
||||
bvh_states[i].buffer_2_offset = SCRATCH_TOTAL_BOUNDS_SIZE + requirements.keyvals_size;
|
||||
} else {
|
||||
bvh_states[i].buffer_1_offset = SCRATCH_TOTAL_BOUNDS_SIZE + requirements.keyvals_size;
|
||||
bvh_states[i].buffer_2_offset = SCRATCH_TOTAL_BOUNDS_SIZE;
|
||||
}
|
||||
bvh_states[i].scratch_offset = bvh_states[i].buffer_1_offset;
|
||||
}
|
||||
|
||||
cmd_buffer->state.flush_bits |= flush_bits;
|
||||
} else {
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
bvh_states[i].buffer_1_offset = 0;
|
||||
bvh_states[i].buffer_2_offset = bvh_states[i].node_count * 4;
|
||||
}
|
||||
}
|
||||
|
||||
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.internal_pipeline);
|
||||
bool progress = true;
|
||||
for (unsigned iter = 0; progress; ++iter) {
|
||||
@@ -1830,18 +2168,20 @@ radv_CmdBuildAccelerationStructuresKHR(
|
||||
if (iter && bvh_states[i].node_count == 1)
|
||||
continue;
|
||||
|
||||
if (!progress) {
|
||||
cmd_buffer->state.flush_bits |=
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
|
||||
radv_dst_access_flush(cmd_buffer,
|
||||
VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
|
||||
}
|
||||
if (!progress)
|
||||
cmd_buffer->state.flush_bits |= flush_bits;
|
||||
|
||||
progress = true;
|
||||
|
||||
uint32_t dst_node_count = MAX2(1, DIV_ROUND_UP(bvh_states[i].node_count, 4));
|
||||
bool final_iter = dst_node_count == 1;
|
||||
|
||||
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
|
||||
uint32_t dst_scratch_offset = src_scratch_offset ? 0 : bvh_states[i].node_count * 4;
|
||||
uint32_t buffer_1_offset = bvh_states[i].buffer_1_offset;
|
||||
uint32_t buffer_2_offset = bvh_states[i].buffer_2_offset;
|
||||
uint32_t dst_scratch_offset =
|
||||
(src_scratch_offset == buffer_1_offset) ? buffer_2_offset : buffer_1_offset;
|
||||
|
||||
uint32_t dst_node_offset = bvh_states[i].node_offset;
|
||||
if (final_iter)
|
||||
dst_node_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
|
||||
@@ -1855,7 +2195,7 @@ radv_CmdBuildAccelerationStructuresKHR(
|
||||
.fill_header = bvh_states[i].node_count | (final_iter ? 0x80000000U : 0),
|
||||
};
|
||||
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
radv_CmdPushConstants(commandBuffer,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.internal_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
||||
radv_unaligned_dispatch(cmd_buffer, dst_node_count, 1, 1);
|
||||
|
@@ -82,6 +82,8 @@
|
||||
#include "radv_shader_args.h"
|
||||
#include "sid.h"
|
||||
|
||||
#include "radix_sort/radix_sort_vk_devaddr.h"
|
||||
|
||||
/* Pre-declarations needed for WSI entrypoints */
|
||||
struct wl_surface;
|
||||
struct wl_display;
|
||||
@@ -661,10 +663,15 @@ struct radv_meta_state {
|
||||
struct {
|
||||
VkPipelineLayout leaf_p_layout;
|
||||
VkPipeline leaf_pipeline;
|
||||
VkPipelineLayout morton_p_layout;
|
||||
VkPipeline morton_pipeline;
|
||||
VkPipelineLayout internal_p_layout;
|
||||
VkPipeline internal_pipeline;
|
||||
VkPipelineLayout copy_p_layout;
|
||||
VkPipeline copy_pipeline;
|
||||
|
||||
struct radix_sort_vk *radix_sort;
|
||||
struct radix_sort_vk_sort_devaddr_info radix_sort_info;
|
||||
} accel_struct_build;
|
||||
|
||||
struct {
|
||||
|
Reference in New Issue
Block a user