radv: Use proper matrices for instance nodes.

Converts both wto and otw matrices to be full row-major 4x3 matrices.

Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18692>
This commit is contained in:
Bas Nieuwenhuizen
2022-09-15 02:06:57 +02:00
committed by Marge Bot
parent 0f9fb8e15f
commit 3c09681edd
8 changed files with 56 additions and 130 deletions

View File

@@ -264,13 +264,13 @@ calculate_instance_node_bounds(radv_bvh_instance_node instance)
radv_accel_struct_header header = DEREF(REF(radv_accel_struct_header)(instance.base_ptr)); radv_accel_struct_header header = DEREF(REF(radv_accel_struct_header)(instance.base_ptr));
for (uint32_t comp = 0; comp < 3; ++comp) { for (uint32_t comp = 0; comp < 3; ++comp) {
aabb.min[comp] = instance.wto_matrix[3 + 4 * comp]; aabb.min[comp] = instance.otw_matrix[comp][3];
aabb.max[comp] = instance.wto_matrix[3 + 4 * comp]; aabb.max[comp] = instance.otw_matrix[comp][3];
for (uint32_t col = 0; col < 3; ++col) { for (uint32_t col = 0; col < 3; ++col) {
aabb.min[comp] += min(instance.otw_matrix[col + comp * 3] * header.aabb[0][col], aabb.min[comp] += min(instance.otw_matrix[comp][col] * header.aabb[0][col],
instance.otw_matrix[col + comp * 3] * header.aabb[1][col]); instance.otw_matrix[comp][col] * header.aabb[1][col]);
aabb.max[comp] += max(instance.otw_matrix[col + comp * 3] * header.aabb[0][col], aabb.max[comp] += max(instance.otw_matrix[comp][col] * header.aabb[0][col],
instance.otw_matrix[col + comp * 3] * header.aabb[1][col]); instance.otw_matrix[comp][col] * header.aabb[1][col]);
} }
} }
return aabb; return aabb;

View File

@@ -94,18 +94,12 @@ struct radv_bvh_instance_node {
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */ /* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
uint32_t sbt_offset_and_flags; uint32_t sbt_offset_and_flags;
/* The translation component is actually a pre-translation instead of a post-translation. If you float wto_matrix[3][4];
* want to get a proper matrix out of it you need to apply the directional component of the
* matrix to it. The pre-translation of the world->object matrix is the same as the
* post-translation of the object->world matrix so this way we can share data between both
* matrices. */
float wto_matrix[12];
uint32_t reserved[6];
uint32_t instance_id; uint32_t instance_id;
uint32_t reserved[3];
/* Object to world matrix transposed from the initial transform. Translate part is store in the /* Object to world matrix transposed from the initial transform. */
* wto_matrix. */ float otw_matrix[3][4];
float otw_matrix[9];
}; };
struct radv_bvh_box16_node { struct radv_bvh_box16_node {

View File

@@ -197,17 +197,13 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g
transform[col][row] = instance.transform[col + row * 4]; transform[col][row] = instance.transform[col + row * 4];
mat4 inv_transform = inverse(transform); mat4 inv_transform = inverse(transform);
for (uint32_t col = 0; col < 3; col++) for (uint32_t row = 0; row < 3; row++)
for (uint32_t row = 0; row < 3; row++) for (uint32_t col = 0; col < 4; col++)
DEREF(node).wto_matrix[col + row * 4] = inv_transform[col][row]; DEREF(node).wto_matrix[row][col] = inv_transform[col][row];
DEREF(node).wto_matrix[3] = transform[3][0]; for (uint32_t col = 0; col < 4; col++)
DEREF(node).wto_matrix[7] = transform[3][1];
DEREF(node).wto_matrix[11] = transform[3][2];
for (uint32_t col = 0; col < 3; col++)
for (uint32_t row = 0; row < 3; row++) for (uint32_t row = 0; row < 3; row++)
DEREF(node).otw_matrix[col + row * 3] = transform[col][row]; DEREF(node).otw_matrix[row][col] = transform[col][row];
radv_accel_struct_header instance_header = radv_accel_struct_header instance_header =
DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference)); DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));

View File

@@ -431,32 +431,23 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
nir_ssa_def *instance_node_addr = nir_ssa_def *instance_node_addr =
nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr),
rq_load_var(b, index, vars->candidate.instance_addr)); rq_load_var(b, index, vars->candidate.instance_addr));
nir_ssa_def *wto_matrix[] = { nir_ssa_def *wto_matrix[3];
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 16), .align_mul = 64, nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
.align_offset = 16), return nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->origin), wto_matrix, true);
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 32), .align_mul = 64,
.align_offset = 32),
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 48), .align_mul = 64,
.align_offset = 48)};
return nir_build_vec3_mat_mult_pre(b, rq_load_var(b, index, vars->origin), wto_matrix);
} }
case nir_ray_query_value_intersection_object_to_world: { case nir_ray_query_value_intersection_object_to_world: {
nir_ssa_def *instance_node_addr = nir_ssa_def *instance_node_addr =
nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr),
rq_load_var(b, index, vars->candidate.instance_addr)); rq_load_var(b, index, vars->candidate.instance_addr));
nir_ssa_def *rows[3];
for (unsigned r = 0; r < 3; ++r)
rows[r] = nir_build_load_global(
b, 4, 32,
nir_iadd_imm(b, instance_node_addr,
offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
if (column == 3) { return nir_vec3(b, nir_channel(b, rows[0], column), nir_channel(b, rows[1], column),
nir_ssa_def *wto_matrix[3]; nir_channel(b, rows[2], column));
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
nir_ssa_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
vals[i] = nir_channel(b, wto_matrix[i], column);
return nir_vec(b, vals, 3);
}
return nir_build_load_global(b, 3, 32, nir_iadd_imm(b, instance_node_addr, 92 + column * 12));
} }
case nir_ray_query_value_intersection_primitive_index: case nir_ray_query_value_intersection_primitive_index:
return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.primitive_id), return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.primitive_id),
@@ -480,9 +471,6 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
for (unsigned i = 0; i < 3; ++i) for (unsigned i = 0; i < 3; ++i)
vals[i] = nir_channel(b, wto_matrix[i], column); vals[i] = nir_channel(b, wto_matrix[i], column);
if (column == 3)
return nir_fneg(b, nir_build_vec3_mat_mult(b, nir_vec(b, vals, 3), wto_matrix, false));
return nir_vec(b, vals, 3); return nir_vec(b, vals, 3);
} }
case nir_ray_query_value_tmin: case nir_ray_query_value_tmin:
@@ -728,15 +716,12 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
} }
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
nir_ssa_def *wto_matrix[] = { nir_ssa_def *wto_matrix[3];
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 16), nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
.align_mul = 64, .align_offset = 16), nir_ssa_def *instance_id = nir_build_load_global(
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 32), b, 1, 32,
.align_mul = 64, .align_offset = 32), nir_iadd_imm(b, instance_node_addr,
nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_node_addr, 48), offsetof(struct radv_bvh_instance_node, instance_id)));
.align_mul = 64, .align_offset = 48)};
nir_ssa_def *instance_id =
nir_build_load_global(b, 1, 32, nir_iadd_imm(b, instance_node_addr, 88));
rq_store_var(b, index, vars->trav.top_stack, rq_store_var(b, index, vars->trav.top_stack,
rq_load_var(b, index, vars->trav.stack), 1); rq_load_var(b, index, vars->trav.stack), 1);
@@ -751,8 +736,8 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
nir_iadd_imm(b, rq_load_var(b, index, vars->trav.stack), 1), 1); nir_iadd_imm(b, rq_load_var(b, index, vars->trav.stack), 1), 1);
rq_store_var(b, index, vars->trav.origin, rq_store_var(b, index, vars->trav.origin,
nir_build_vec3_mat_mult_pre(b, rq_load_var(b, index, vars->origin), nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->origin),
wto_matrix), wto_matrix, true),
7); 7);
rq_store_var(b, index, vars->trav.direction, rq_store_var(b, index, vars->trav.direction,
nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->direction), nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->direction),

View File

@@ -611,43 +611,28 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
vals[i] = nir_channel(&b_shader, wto_matrix[i], c); vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
ret = nir_vec(&b_shader, vals, 3); ret = nir_vec(&b_shader, vals, 3);
if (c == 3)
ret = nir_fneg(&b_shader,
nir_build_vec3_mat_mult(&b_shader, ret, wto_matrix, false));
break; break;
} }
case nir_intrinsic_load_ray_object_to_world: { case nir_intrinsic_load_ray_object_to_world: {
unsigned c = nir_intrinsic_column(intr); unsigned c = nir_intrinsic_column(intr);
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
if (c == 3) { nir_ssa_def *rows[3];
nir_ssa_def *wto_matrix[3]; for (unsigned r = 0; r < 3; ++r)
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); rows[r] = nir_build_load_global(
&b_shader, 4, 32,
nir_ssa_def *vals[3]; nir_iadd_imm(&b_shader, instance_node_addr,
for (unsigned i = 0; i < 3; ++i) offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
vals[i] = nir_channel(&b_shader, wto_matrix[i], c); ret =
nir_vec3(&b_shader, nir_channel(&b_shader, rows[0], c),
ret = nir_vec(&b_shader, vals, 3); nir_channel(&b_shader, rows[1], c), nir_channel(&b_shader, rows[2], c));
} else {
ret = nir_build_load_global(
&b_shader, 3, 32, nir_iadd_imm(&b_shader, instance_node_addr, 92 + c * 12));
}
break; break;
} }
case nir_intrinsic_load_ray_object_origin: { case nir_intrinsic_load_ray_object_origin: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *wto_matrix[] = { nir_ssa_def *wto_matrix[3];
nir_build_load_global(&b_shader, 4, 32, nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
nir_iadd_imm(&b_shader, instance_node_addr, 16), ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin),
.align_mul = 64, .align_offset = 16), wto_matrix, true);
nir_build_load_global(&b_shader, 4, 32,
nir_iadd_imm(&b_shader, instance_node_addr, 32),
.align_mul = 64, .align_offset = 32),
nir_build_load_global(&b_shader, 4, 32,
nir_iadd_imm(&b_shader, instance_node_addr, 48),
.align_mul = 64, .align_offset = 48)};
ret = nir_build_vec3_mat_mult_pre(
&b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix);
break; break;
} }
case nir_intrinsic_load_ray_object_direction: { case nir_intrinsic_load_ray_object_direction: {
@@ -1539,17 +1524,14 @@ build_traversal_shader(struct radv_device *device,
{ {
/* instance */ /* instance */
nir_ssa_def *instance_node_addr = build_node_to_addr(device, &b, bvh_node); nir_ssa_def *instance_node_addr = build_node_to_addr(device, &b, bvh_node);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(&b, instance_node_addr, wto_matrix);
nir_ssa_def *instance_data = nir_ssa_def *instance_data =
nir_build_load_global(&b, 4, 32, instance_node_addr, .align_mul = 64); nir_build_load_global(&b, 4, 32, instance_node_addr, .align_mul = 64);
nir_ssa_def *wto_matrix[] = { nir_ssa_def *instance_id = nir_build_load_global(
nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 16), &b, 1, 32,
.align_mul = 64, .align_offset = 16), nir_iadd_imm(&b, instance_node_addr,
nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 32), offsetof(struct radv_bvh_instance_node, instance_id)));
.align_mul = 64, .align_offset = 32),
nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 48),
.align_mul = 64, .align_offset = 48)};
nir_ssa_def *instance_id =
nir_build_load_global(&b, 1, 32, nir_iadd_imm(&b, instance_node_addr, 88));
nir_ssa_def *instance_and_mask = nir_channel(&b, instance_data, 2); nir_ssa_def *instance_and_mask = nir_channel(&b, instance_data, 2);
nir_ssa_def *instance_mask = nir_ushr_imm(&b, instance_and_mask, 24); nir_ssa_def *instance_mask = nir_ushr_imm(&b, instance_and_mask, 24);
@@ -1568,7 +1550,7 @@ build_traversal_shader(struct radv_device *device,
nir_store_var( nir_store_var(
&b, trav_vars.origin, &b, trav_vars.origin,
nir_build_vec3_mat_mult_pre(&b, nir_load_var(&b, vars.origin), wto_matrix), 7); nir_build_vec3_mat_mult(&b, nir_load_var(&b, vars.origin), wto_matrix, true), 7);
nir_store_var( nir_store_var(
&b, trav_vars.dir, &b, trav_vars.dir,
nir_build_vec3_mat_mult(&b, nir_load_var(&b, vars.direction), wto_matrix, false), nir_build_vec3_mat_mult(&b, nir_load_var(&b, vars.direction), wto_matrix, false),

View File

@@ -510,25 +510,8 @@ rra_transcode_instance_node(struct rra_instance_node *dst, const struct radv_bvh
dst->instance_id = src->instance_id; dst->instance_id = src->instance_id;
dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata); dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata);
float full_otw_matrix[16]; memcpy(dst->wto_matrix, src->wto_matrix, sizeof(dst->wto_matrix));
for (int row = 0; row < 3; ++row) memcpy(dst->otw_matrix, src->otw_matrix, sizeof(dst->otw_matrix));
for (int col = 0; col < 3; ++col)
full_otw_matrix[row * 4 + col] = src->otw_matrix[row * 3 + col];
full_otw_matrix[3] = src->wto_matrix[3];
full_otw_matrix[7] = src->wto_matrix[7];
full_otw_matrix[11] = src->wto_matrix[11];
full_otw_matrix[12] = 0.0f;
full_otw_matrix[13] = 0.0f;
full_otw_matrix[14] = 0.0f;
full_otw_matrix[15] = 1.0f;
float full_wto_matrix[16];
util_invert_mat4x4(full_wto_matrix, full_otw_matrix);
memcpy(dst->wto_matrix, full_wto_matrix, sizeof(dst->wto_matrix));
memcpy(dst->otw_matrix, full_otw_matrix, sizeof(dst->otw_matrix));
} }
static void static void

View File

@@ -375,18 +375,6 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[],
return nir_vec(b, result_components, 3); return nir_vec(b, result_components, 3);
} }
nir_ssa_def *
nir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[])
{
nir_ssa_def *result_components[3] = {
nir_channel(b, matrix[0], 3),
nir_channel(b, matrix[1], 3),
nir_channel(b, matrix[2], 3),
};
return nir_build_vec3_mat_mult(b, nir_fsub(b, vec, nir_vec(b, result_components, 3)), matrix,
false);
}
void void
nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out) nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out)
{ {

View File

@@ -52,8 +52,6 @@ nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_
nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[],
bool translation); bool translation);
nir_ssa_def *nir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[]);
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out); void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
nir_ssa_def *hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags, nir_ssa_def *hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags,