radv: Move ray flag compares out of the loop.

To save on and+cmp combos with VALU instructions.

Totals from 7 (0.01% of 134913) affected shaders:

CodeSize: 208476 -> 209216 (+0.35%)
Instrs: 38384 -> 38402 (+0.05%)
Latency: 805725 -> 804537 (-0.15%)
InvThroughput: 165906 -> 165663 (-0.15%)
Copies: 4936 -> 4919 (-0.34%)
PreSGPRs: 393 -> 430 (+9.41%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19706>
This commit is contained in:
Bas Nieuwenhuizen
2022-11-11 22:43:25 +01:00
committed by Marge Bot
parent e2dadda35f
commit 0a26975840
4 changed files with 68 additions and 54 deletions

View File

@@ -322,10 +322,14 @@ copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_v
static void static void
insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
bool break_on_terminate) const struct radv_ray_flags *ray_flags, bool break_on_terminate)
{ {
nir_ssa_def *terminate_on_first_hit = nir_ssa_def *terminate_on_first_hit;
nir_test_mask(b, rq_load_var(b, index, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask); if (ray_flags)
terminate_on_first_hit = ray_flags->terminate_on_first_hit;
else
terminate_on_first_hit = nir_test_mask(b, rq_load_var(b, index, vars->flags),
SpvRayFlagsTerminateOnFirstHitKHRMask);
nir_push_if(b, terminate_on_first_hit); nir_push_if(b, terminate_on_first_hit);
{ {
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1); rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1);
@@ -340,7 +344,7 @@ lower_rq_confirm_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_
struct ray_query_vars *vars) struct ray_query_vars *vars)
{ {
copy_candidate_to_closest(b, index, vars); copy_candidate_to_closest(b, index, vars);
insert_terminate_on_first_hit(b, index, vars, false); insert_terminate_on_first_hit(b, index, vars, NULL, false);
} }
static void static void
@@ -351,7 +355,7 @@ lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic
nir_fge(b, instr->src[1].ssa, rq_load_var(b, index, vars->tmin)))); nir_fge(b, instr->src[1].ssa, rq_load_var(b, index, vars->tmin))));
{ {
copy_candidate_to_closest(b, index, vars); copy_candidate_to_closest(b, index, vars);
insert_terminate_on_first_hit(b, index, vars, false); insert_terminate_on_first_hit(b, index, vars, NULL, false);
rq_store_var(b, index, vars->closest.t, instr->src[1].ssa, 0x1); rq_store_var(b, index, vars->closest.t, instr->src[1].ssa, 0x1);
} }
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
@@ -604,7 +608,8 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
static void static void
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection, handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
const struct radv_ray_traversal_args *args) const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags)
{ {
struct traversal_data *data = args->data; struct traversal_data *data = args->data;
struct ray_query_vars *vars = data->vars; struct ray_query_vars *vars = data->vars;
@@ -623,7 +628,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
nir_push_if(b, intersection->base.opaque); nir_push_if(b, intersection->base.opaque);
{ {
copy_candidate_to_closest(b, index, vars); copy_candidate_to_closest(b, index, vars);
insert_terminate_on_first_hit(b, index, vars, true); insert_terminate_on_first_hit(b, index, vars, ray_flags, true);
} }
nir_push_else(b, NULL); nir_push_else(b, NULL);
{ {

View File

@@ -387,9 +387,9 @@ nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_de
/* When a hit is opaque the any_hit shader is skipped for this hit and the hit /* When a hit is opaque the any_hit shader is skipped for this hit and the hit
* is assumed to be an actual hit. */ * is assumed to be an actual hit. */
nir_ssa_def * static nir_ssa_def *
hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags, hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags,
nir_ssa_def *geometry_id_and_flags) const struct radv_ray_flags *ray_flags, nir_ssa_def *geometry_id_and_flags)
{ {
nir_ssa_def *geom_force_opaque = nir_ssa_def *geom_force_opaque =
nir_test_mask(b, geometry_id_and_flags, VK_GEOMETRY_OPAQUE_BIT_KHR << 28); nir_test_mask(b, geometry_id_and_flags, VK_GEOMETRY_OPAQUE_BIT_KHR << 28);
@@ -402,11 +402,8 @@ hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *fl
opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque); opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque);
opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque); opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque);
nir_ssa_def *ray_force_opaque = nir_test_mask(b, flags, SpvRayFlagsOpaqueKHRMask); opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_bool(b, true), opaque);
nir_ssa_def *ray_force_non_opaque = nir_test_mask(b, flags, SpvRayFlagsNoOpaqueKHRMask); opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_bool(b, false), opaque);
opaque = nir_bcsel(b, ray_force_opaque, nir_imm_bool(b, true), opaque);
opaque = nir_bcsel(b, ray_force_non_opaque, nir_imm_bool(b, false), opaque);
return opaque; return opaque;
} }
@@ -424,7 +421,8 @@ create_bvh_descriptor(nir_builder *b)
static void static void
insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
const struct radv_ray_traversal_args *args, nir_ssa_def *result, const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_ssa_def *result,
nir_ssa_def *bvh_node) nir_ssa_def *bvh_node)
{ {
if (!args->triangle_cb) if (!args->triangle_cb)
@@ -443,15 +441,9 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR << 24); VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR << 24);
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw); intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
nir_ssa_def *not_cull = nir_ssa_def *not_cull = ray_flags->no_skip_triangles;
nir_inot(b, nir_test_mask(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask));
nir_ssa_def *not_facing_cull = nir_ssa_def *not_facing_cull =
nir_ieq_imm(b, nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
nir_iand(b, args->flags,
nir_bcsel(b, intersection.frontface,
nir_imm_int(b, SpvRayFlagsCullFrontFacingTrianglesKHRMask),
nir_imm_int(b, SpvRayFlagsCullBackFacingTrianglesKHRMask))),
0);
not_cull = nir_iand( not_cull = nir_iand(
b, not_cull, b, not_cull,
@@ -471,22 +463,18 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
intersection.base.primitive_id = nir_channel(b, triangle_info, 0); intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1); intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
intersection.base.opaque = intersection.base.opaque =
hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), args->flags, hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
intersection.base.geometry_id_and_flags); intersection.base.geometry_id_and_flags);
not_cull = nir_ieq_imm(b, not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque,
nir_iand(b, args->flags, ray_flags->no_cull_no_opaque);
nir_bcsel(b, intersection.base.opaque,
nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask),
nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))),
0);
nir_push_if(b, not_cull); nir_push_if(b, not_cull);
{ {
nir_ssa_def *divs[2] = {div, div}; nir_ssa_def *divs[2] = {div, div};
intersection.barycentrics = intersection.barycentrics =
nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2)); nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
args->triangle_cb(b, &intersection, args); args->triangle_cb(b, &intersection, args, ray_flags);
} }
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
} }
@@ -497,7 +485,8 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
static void static void
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, insert_traversal_aabb_case(struct radv_device *device, nir_builder *b,
const struct radv_ray_traversal_args *args, nir_ssa_def *bvh_node) const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_ssa_def *bvh_node)
{ {
if (!args->aabb_cb) if (!args->aabb_cb)
return; return;
@@ -509,18 +498,11 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b,
intersection.primitive_id = nir_channel(b, triangle_info, 0); intersection.primitive_id = nir_channel(b, triangle_info, 0);
intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1); intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
args->flags, intersection.geometry_id_and_flags); ray_flags, intersection.geometry_id_and_flags);
nir_ssa_def *not_skip_aabb = nir_ssa_def *not_cull =
nir_inot(b, nir_test_mask(b, args->flags, SpvRayFlagsSkipAABBsKHRMask)); nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
nir_ssa_def *not_cull = nir_iand( not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
b, not_skip_aabb,
nir_ieq_imm(
b,
nir_iand(b, args->flags,
nir_bcsel(b, intersection.opaque, nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask),
nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))),
0));
nir_push_if(b, not_cull); nir_push_if(b, not_cull);
{ {
args->aabb_cb(b, &intersection, args); args->aabb_cb(b, &intersection, args);
@@ -548,6 +530,24 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
nir_ssa_def *desc = create_bvh_descriptor(b); nir_ssa_def *desc = create_bvh_descriptor(b);
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
struct radv_ray_flags ray_flags = {
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
.terminate_on_first_hit =
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
.no_cull_front = nir_ieq_imm(
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
.no_cull_back = nir_ieq_imm(
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
.no_cull_opaque =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
.no_cull_no_opaque =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
.no_skip_triangles =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
.no_skip_aabbs =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
};
nir_push_loop(b); nir_push_loop(b);
{ {
nir_push_if( nir_push_if(
@@ -648,7 +648,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
{ {
nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb)); nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb));
{ {
insert_traversal_aabb_case(device, b, args, global_bvh_node); insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
} }
nir_push_else(b, NULL); nir_push_else(b, NULL);
{ {
@@ -761,7 +761,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
nir_load_deref(b, args->vars.inv_dir)); nir_load_deref(b, args->vars.inv_dir));
} }
insert_traversal_triangle_case(device, b, args, result, global_bvh_node); insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
} }
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
} }

View File

@@ -54,13 +54,22 @@ nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_d
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out); void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
nir_ssa_def *hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags,
nir_ssa_def *geometry_id_and_flags);
nir_ssa_def *create_bvh_descriptor(nir_builder *b); nir_ssa_def *create_bvh_descriptor(nir_builder *b);
struct radv_ray_traversal_args; struct radv_ray_traversal_args;
struct radv_ray_flags {
nir_ssa_def *force_opaque;
nir_ssa_def *force_not_opaque;
nir_ssa_def *terminate_on_first_hit;
nir_ssa_def *no_cull_front;
nir_ssa_def *no_cull_back;
nir_ssa_def *no_cull_opaque;
nir_ssa_def *no_cull_no_opaque;
nir_ssa_def *no_skip_triangles;
nir_ssa_def *no_skip_aabbs;
};
struct radv_leaf_intersection { struct radv_leaf_intersection {
nir_ssa_def *node_addr; nir_ssa_def *node_addr;
nir_ssa_def *primitive_id; nir_ssa_def *primitive_id;
@@ -82,7 +91,8 @@ struct radv_triangle_intersection {
typedef void (*radv_triangle_intersection_cb)(nir_builder *b, typedef void (*radv_triangle_intersection_cb)(nir_builder *b,
struct radv_triangle_intersection *intersection, struct radv_triangle_intersection *intersection,
const struct radv_ray_traversal_args *args); const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags);
typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
const struct radv_ray_traversal_args *args); const struct radv_ray_traversal_args *args);

View File

@@ -999,7 +999,8 @@ struct traversal_data {
static void static void
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection, handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
const struct radv_ray_traversal_args *args) const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags)
{ {
struct traversal_data *data = args->data; struct traversal_data *data = args->data;
@@ -1056,10 +1057,8 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
nir_store_var(b, data->vars->idx, sbt_idx, 1); nir_store_var(b, data->vars->idx, sbt_idx, 1);
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1); nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
nir_ssa_def *terminate_on_first_hit =
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate); nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); nir_push_if(b, nir_ior(b, ray_flags->terminate_on_first_hit, ray_terminated));
{ {
nir_jump(b, nir_jump_break); nir_jump(b, nir_jump_break);
} }