radv: Use proper matrices for instance nodes.

Converts both wto and otw matrices to be full row-major 4x3 matrices.

Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18692>
This commit is contained in:
Bas Nieuwenhuizen
2022-09-15 02:06:57 +02:00
committed by Marge Bot
parent 0f9fb8e15f
commit 3c09681edd
8 changed files with 56 additions and 130 deletions

View File

@@ -264,13 +264,13 @@ calculate_instance_node_bounds(radv_bvh_instance_node instance)
radv_accel_struct_header header = DEREF(REF(radv_accel_struct_header)(instance.base_ptr));
for (uint32_t comp = 0; comp < 3; ++comp) {
aabb.min[comp] = instance.wto_matrix[3 + 4 * comp];
aabb.max[comp] = instance.wto_matrix[3 + 4 * comp];
aabb.min[comp] = instance.otw_matrix[comp][3];
aabb.max[comp] = instance.otw_matrix[comp][3];
for (uint32_t col = 0; col < 3; ++col) {
aabb.min[comp] += min(instance.otw_matrix[col + comp * 3] * header.aabb[0][col],
instance.otw_matrix[col + comp * 3] * header.aabb[1][col]);
aabb.max[comp] += max(instance.otw_matrix[col + comp * 3] * header.aabb[0][col],
instance.otw_matrix[col + comp * 3] * header.aabb[1][col]);
aabb.min[comp] += min(instance.otw_matrix[comp][col] * header.aabb[0][col],
instance.otw_matrix[comp][col] * header.aabb[1][col]);
aabb.max[comp] += max(instance.otw_matrix[comp][col] * header.aabb[0][col],
instance.otw_matrix[comp][col] * header.aabb[1][col]);
}
}
return aabb;

View File

@@ -94,18 +94,12 @@ struct radv_bvh_instance_node {
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
uint32_t sbt_offset_and_flags;
/* The translation component is actually a pre-translation instead of a post-translation. If you
* want to get a proper matrix out of it you need to apply the directional component of the
* matrix to it. The pre-translation of the world->object matrix is the same as the
* post-translation of the object->world matrix so this way we can share data between both
* matrices. */
float wto_matrix[12];
uint32_t reserved[6];
float wto_matrix[3][4];
uint32_t instance_id;
uint32_t reserved[3];
/* Object to world matrix transposed from the initial transform. Translate part is store in the
* wto_matrix. */
float otw_matrix[9];
/* Object to world matrix transposed from the initial transform. */
float otw_matrix[3][4];
};
struct radv_bvh_box16_node {

View File

@@ -197,17 +197,13 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g
transform[col][row] = instance.transform[col + row * 4];
mat4 inv_transform = inverse(transform);
for (uint32_t col = 0; col < 3; col++)
for (uint32_t row = 0; row < 3; row++)
DEREF(node).wto_matrix[col + row * 4] = inv_transform[col][row];
for (uint32_t row = 0; row < 3; row++)
for (uint32_t col = 0; col < 4; col++)
DEREF(node).wto_matrix[row][col] = inv_transform[col][row];
DEREF(node).wto_matrix[3] = transform[3][0];
DEREF(node).wto_matrix[7] = transform[3][1];
DEREF(node).wto_matrix[11] = transform[3][2];
for (uint32_t col = 0; col < 3; col++)
for (uint32_t col = 0; col < 4; col++)
for (uint32_t row = 0; row < 3; row++)
DEREF(node).otw_matrix[col + row * 3] = transform[col][row];
DEREF(node).otw_matrix[row][col] = transform[col][row];
radv_accel_struct_header instance_header =
DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));

View File

@@ -431,32 +431,23 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
nir_ssa_def *instance_node_addr =
nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr),
rq_load_var(b, index, vars->candidate.instance_addr));
nir_ssa_def *wto_matrix[] = {
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 16), .align_mul = 64,
.align_offset = 16),
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 32), .align_mul = 64,
.align_offset = 32),
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 48), .align_mul = 64,
.align_offset = 48)};
return nir_build_vec3_mat_mult_pre(b, rq_load_var(b, index, vars->origin), wto_matrix);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
return nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->origin), wto_matrix, true);
}
case nir_ray_query_value_intersection_object_to_world: {
nir_ssa_def *instance_node_addr =
nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr),
rq_load_var(b, index, vars->candidate.instance_addr));
nir_ssa_def *rows[3];
for (unsigned r = 0; r < 3; ++r)
rows[r] = nir_build_load_global(
b, 4, 32,
nir_iadd_imm(b, instance_node_addr,
offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
if (column == 3) {
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
nir_ssa_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
vals[i] = nir_channel(b, wto_matrix[i], column);
return nir_vec(b, vals, 3);
}
return nir_build_load_global(b, 3, 32, nir_iadd_imm(b, instance_node_addr, 92 + column * 12));
return nir_vec3(b, nir_channel(b, rows[0], column), nir_channel(b, rows[1], column),
nir_channel(b, rows[2], column));
}
case nir_ray_query_value_intersection_primitive_index:
return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.primitive_id),
@@ -480,9 +471,6 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
for (unsigned i = 0; i < 3; ++i)
vals[i] = nir_channel(b, wto_matrix[i], column);
if (column == 3)
return nir_fneg(b, nir_build_vec3_mat_mult(b, nir_vec(b, vals, 3), wto_matrix, false));
return nir_vec(b, vals, 3);
}
case nir_ray_query_value_tmin:
@@ -728,15 +716,12 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
}
nir_pop_if(b, NULL);
nir_ssa_def *wto_matrix[] = {
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 16),
.align_mul = 64, .align_offset = 16),
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 32),
.align_mul = 64, .align_offset = 32),
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 48),
.align_mul = 64, .align_offset = 48)};
nir_ssa_def *instance_id =
nir_build_load_global(b, 1, 32, nir_iadd_imm(b, instance_node_addr, 88));
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
nir_ssa_def *instance_id = nir_build_load_global(
b, 1, 32,
nir_iadd_imm(b, instance_node_addr,
offsetof(struct radv_bvh_instance_node, instance_id)));
rq_store_var(b, index, vars->trav.top_stack,
rq_load_var(b, index, vars->trav.stack), 1);
@@ -751,8 +736,8 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
nir_iadd_imm(b, rq_load_var(b, index, vars->trav.stack), 1), 1);
rq_store_var(b, index, vars->trav.origin,
nir_build_vec3_mat_mult_pre(b, rq_load_var(b, index, vars->origin),
wto_matrix),
nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->origin),
wto_matrix, true),
7);
rq_store_var(b, index, vars->trav.direction,
nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->direction),

View File

@@ -611,43 +611,28 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
ret = nir_vec(&b_shader, vals, 3);
if (c == 3)
ret = nir_fneg(&b_shader,
nir_build_vec3_mat_mult(&b_shader, ret, wto_matrix, false));
break;
}
case nir_intrinsic_load_ray_object_to_world: {
unsigned c = nir_intrinsic_column(intr);
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
if (c == 3) {
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
nir_ssa_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
ret = nir_vec(&b_shader, vals, 3);
} else {
ret = nir_build_load_global(
&b_shader, 3, 32, nir_iadd_imm(&b_shader, instance_node_addr, 92 + c * 12));
}
nir_ssa_def *rows[3];
for (unsigned r = 0; r < 3; ++r)
rows[r] = nir_build_load_global(
&b_shader, 4, 32,
nir_iadd_imm(&b_shader, instance_node_addr,
offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
ret =
nir_vec3(&b_shader, nir_channel(&b_shader, rows[0], c),
nir_channel(&b_shader, rows[1], c), nir_channel(&b_shader, rows[2], c));
break;
}
case nir_intrinsic_load_ray_object_origin: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *wto_matrix[] = {
nir_build_load_global(&b_shader, 4, 32,
nir_iadd_imm(&b_shader, instance_node_addr, 16),
.align_mul = 64, .align_offset = 16),
nir_build_load_global(&b_shader, 4, 32,
nir_iadd_imm(&b_shader, instance_node_addr, 32),
.align_mul = 64, .align_offset = 32),
nir_build_load_global(&b_shader, 4, 32,
nir_iadd_imm(&b_shader, instance_node_addr, 48),
.align_mul = 64, .align_offset = 48)};
ret = nir_build_vec3_mat_mult_pre(
&b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin),
wto_matrix, true);
break;
}
case nir_intrinsic_load_ray_object_direction: {
@@ -1539,17 +1524,14 @@ build_traversal_shader(struct radv_device *device,
{
/* instance */
nir_ssa_def *instance_node_addr = build_node_to_addr(device, &b, bvh_node);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(&b, instance_node_addr, wto_matrix);
nir_ssa_def *instance_data =
nir_build_load_global(&b, 4, 32, instance_node_addr, .align_mul = 64);
nir_ssa_def *wto_matrix[] = {
nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 16),
.align_mul = 64, .align_offset = 16),
nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 32),
.align_mul = 64, .align_offset = 32),
nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 48),
.align_mul = 64, .align_offset = 48)};
nir_ssa_def *instance_id =
nir_build_load_global(&b, 1, 32, nir_iadd_imm(&b, instance_node_addr, 88));
nir_ssa_def *instance_id = nir_build_load_global(
&b, 1, 32,
nir_iadd_imm(&b, instance_node_addr,
offsetof(struct radv_bvh_instance_node, instance_id)));
nir_ssa_def *instance_and_mask = nir_channel(&b, instance_data, 2);
nir_ssa_def *instance_mask = nir_ushr_imm(&b, instance_and_mask, 24);
@@ -1568,7 +1550,7 @@ build_traversal_shader(struct radv_device *device,
nir_store_var(
&b, trav_vars.origin,
nir_build_vec3_mat_mult_pre(&b, nir_load_var(&b, vars.origin), wto_matrix), 7);
nir_build_vec3_mat_mult(&b, nir_load_var(&b, vars.origin), wto_matrix, true), 7);
nir_store_var(
&b, trav_vars.dir,
nir_build_vec3_mat_mult(&b, nir_load_var(&b, vars.direction), wto_matrix, false),

View File

@@ -510,25 +510,8 @@ rra_transcode_instance_node(struct rra_instance_node *dst, const struct radv_bvh
dst->instance_id = src->instance_id;
dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata);
float full_otw_matrix[16];
for (int row = 0; row < 3; ++row)
for (int col = 0; col < 3; ++col)
full_otw_matrix[row * 4 + col] = src->otw_matrix[row * 3 + col];
full_otw_matrix[3] = src->wto_matrix[3];
full_otw_matrix[7] = src->wto_matrix[7];
full_otw_matrix[11] = src->wto_matrix[11];
full_otw_matrix[12] = 0.0f;
full_otw_matrix[13] = 0.0f;
full_otw_matrix[14] = 0.0f;
full_otw_matrix[15] = 1.0f;
float full_wto_matrix[16];
util_invert_mat4x4(full_wto_matrix, full_otw_matrix);
memcpy(dst->wto_matrix, full_wto_matrix, sizeof(dst->wto_matrix));
memcpy(dst->otw_matrix, full_otw_matrix, sizeof(dst->otw_matrix));
memcpy(dst->wto_matrix, src->wto_matrix, sizeof(dst->wto_matrix));
memcpy(dst->otw_matrix, src->otw_matrix, sizeof(dst->otw_matrix));
}
static void

View File

@@ -375,18 +375,6 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[],
return nir_vec(b, result_components, 3);
}
nir_ssa_def *
nir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[])
{
nir_ssa_def *result_components[3] = {
nir_channel(b, matrix[0], 3),
nir_channel(b, matrix[1], 3),
nir_channel(b, matrix[2], 3),
};
return nir_build_vec3_mat_mult(b, nir_fsub(b, vec, nir_vec(b, result_components, 3)), matrix,
false);
}
void
nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out)
{

View File

@@ -52,8 +52,6 @@ nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_
nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[],
bool translation);
nir_ssa_def *nir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[]);
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
nir_ssa_def *hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags,