radv: Move ray flag compares out of the loop.
To save on and+cmp combos with VALU instructions. Totals from 7 (0.01% of 134913) affected shaders: CodeSize: 208476 -> 209216 (+0.35%) Instrs: 38384 -> 38402 (+0.05%) Latency: 805725 -> 804537 (-0.15%) InvThroughput: 165906 -> 165663 (-0.15%) Copies: 4936 -> 4919 (-0.34%) PreSGPRs: 393 -> 430 (+9.41%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19706>
This commit is contained in:

committed by
Marge Bot

parent
e2dadda35f
commit
0a26975840
@@ -322,10 +322,14 @@ copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_v
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
|
insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
|
||||||
bool break_on_terminate)
|
const struct radv_ray_flags *ray_flags, bool break_on_terminate)
|
||||||
{
|
{
|
||||||
nir_ssa_def *terminate_on_first_hit =
|
nir_ssa_def *terminate_on_first_hit;
|
||||||
nir_test_mask(b, rq_load_var(b, index, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask);
|
if (ray_flags)
|
||||||
|
terminate_on_first_hit = ray_flags->terminate_on_first_hit;
|
||||||
|
else
|
||||||
|
terminate_on_first_hit = nir_test_mask(b, rq_load_var(b, index, vars->flags),
|
||||||
|
SpvRayFlagsTerminateOnFirstHitKHRMask);
|
||||||
nir_push_if(b, terminate_on_first_hit);
|
nir_push_if(b, terminate_on_first_hit);
|
||||||
{
|
{
|
||||||
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1);
|
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1);
|
||||||
@@ -340,7 +344,7 @@ lower_rq_confirm_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_
|
|||||||
struct ray_query_vars *vars)
|
struct ray_query_vars *vars)
|
||||||
{
|
{
|
||||||
copy_candidate_to_closest(b, index, vars);
|
copy_candidate_to_closest(b, index, vars);
|
||||||
insert_terminate_on_first_hit(b, index, vars, false);
|
insert_terminate_on_first_hit(b, index, vars, NULL, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -351,7 +355,7 @@ lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic
|
|||||||
nir_fge(b, instr->src[1].ssa, rq_load_var(b, index, vars->tmin))));
|
nir_fge(b, instr->src[1].ssa, rq_load_var(b, index, vars->tmin))));
|
||||||
{
|
{
|
||||||
copy_candidate_to_closest(b, index, vars);
|
copy_candidate_to_closest(b, index, vars);
|
||||||
insert_terminate_on_first_hit(b, index, vars, false);
|
insert_terminate_on_first_hit(b, index, vars, NULL, false);
|
||||||
rq_store_var(b, index, vars->closest.t, instr->src[1].ssa, 0x1);
|
rq_store_var(b, index, vars->closest.t, instr->src[1].ssa, 0x1);
|
||||||
}
|
}
|
||||||
nir_pop_if(b, NULL);
|
nir_pop_if(b, NULL);
|
||||||
@@ -604,7 +608,8 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
|
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
|
||||||
const struct radv_ray_traversal_args *args)
|
const struct radv_ray_traversal_args *args,
|
||||||
|
const struct radv_ray_flags *ray_flags)
|
||||||
{
|
{
|
||||||
struct traversal_data *data = args->data;
|
struct traversal_data *data = args->data;
|
||||||
struct ray_query_vars *vars = data->vars;
|
struct ray_query_vars *vars = data->vars;
|
||||||
@@ -623,7 +628,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||||||
nir_push_if(b, intersection->base.opaque);
|
nir_push_if(b, intersection->base.opaque);
|
||||||
{
|
{
|
||||||
copy_candidate_to_closest(b, index, vars);
|
copy_candidate_to_closest(b, index, vars);
|
||||||
insert_terminate_on_first_hit(b, index, vars, true);
|
insert_terminate_on_first_hit(b, index, vars, ray_flags, true);
|
||||||
}
|
}
|
||||||
nir_push_else(b, NULL);
|
nir_push_else(b, NULL);
|
||||||
{
|
{
|
||||||
|
@@ -387,9 +387,9 @@ nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_de
|
|||||||
|
|
||||||
/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
|
/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
|
||||||
* is assumed to be an actual hit. */
|
* is assumed to be an actual hit. */
|
||||||
nir_ssa_def *
|
static nir_ssa_def *
|
||||||
hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags,
|
hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags,
|
||||||
nir_ssa_def *geometry_id_and_flags)
|
const struct radv_ray_flags *ray_flags, nir_ssa_def *geometry_id_and_flags)
|
||||||
{
|
{
|
||||||
nir_ssa_def *geom_force_opaque =
|
nir_ssa_def *geom_force_opaque =
|
||||||
nir_test_mask(b, geometry_id_and_flags, VK_GEOMETRY_OPAQUE_BIT_KHR << 28);
|
nir_test_mask(b, geometry_id_and_flags, VK_GEOMETRY_OPAQUE_BIT_KHR << 28);
|
||||||
@@ -402,11 +402,8 @@ hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *fl
|
|||||||
opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque);
|
opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque);
|
||||||
opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque);
|
opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque);
|
||||||
|
|
||||||
nir_ssa_def *ray_force_opaque = nir_test_mask(b, flags, SpvRayFlagsOpaqueKHRMask);
|
opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_bool(b, true), opaque);
|
||||||
nir_ssa_def *ray_force_non_opaque = nir_test_mask(b, flags, SpvRayFlagsNoOpaqueKHRMask);
|
opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_bool(b, false), opaque);
|
||||||
|
|
||||||
opaque = nir_bcsel(b, ray_force_opaque, nir_imm_bool(b, true), opaque);
|
|
||||||
opaque = nir_bcsel(b, ray_force_non_opaque, nir_imm_bool(b, false), opaque);
|
|
||||||
return opaque;
|
return opaque;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -424,7 +421,8 @@ create_bvh_descriptor(nir_builder *b)
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
|
insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
|
||||||
const struct radv_ray_traversal_args *args, nir_ssa_def *result,
|
const struct radv_ray_traversal_args *args,
|
||||||
|
const struct radv_ray_flags *ray_flags, nir_ssa_def *result,
|
||||||
nir_ssa_def *bvh_node)
|
nir_ssa_def *bvh_node)
|
||||||
{
|
{
|
||||||
if (!args->triangle_cb)
|
if (!args->triangle_cb)
|
||||||
@@ -443,15 +441,9 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
|
|||||||
VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR << 24);
|
VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR << 24);
|
||||||
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
|
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
|
||||||
|
|
||||||
nir_ssa_def *not_cull =
|
nir_ssa_def *not_cull = ray_flags->no_skip_triangles;
|
||||||
nir_inot(b, nir_test_mask(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask));
|
|
||||||
nir_ssa_def *not_facing_cull =
|
nir_ssa_def *not_facing_cull =
|
||||||
nir_ieq_imm(b,
|
nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
|
||||||
nir_iand(b, args->flags,
|
|
||||||
nir_bcsel(b, intersection.frontface,
|
|
||||||
nir_imm_int(b, SpvRayFlagsCullFrontFacingTrianglesKHRMask),
|
|
||||||
nir_imm_int(b, SpvRayFlagsCullBackFacingTrianglesKHRMask))),
|
|
||||||
0);
|
|
||||||
|
|
||||||
not_cull = nir_iand(
|
not_cull = nir_iand(
|
||||||
b, not_cull,
|
b, not_cull,
|
||||||
@@ -471,22 +463,18 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
|
|||||||
intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
|
intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
|
||||||
intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
|
intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
|
||||||
intersection.base.opaque =
|
intersection.base.opaque =
|
||||||
hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), args->flags,
|
hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
|
||||||
intersection.base.geometry_id_and_flags);
|
intersection.base.geometry_id_and_flags);
|
||||||
|
|
||||||
not_cull = nir_ieq_imm(b,
|
not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque,
|
||||||
nir_iand(b, args->flags,
|
ray_flags->no_cull_no_opaque);
|
||||||
nir_bcsel(b, intersection.base.opaque,
|
|
||||||
nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask),
|
|
||||||
nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))),
|
|
||||||
0);
|
|
||||||
nir_push_if(b, not_cull);
|
nir_push_if(b, not_cull);
|
||||||
{
|
{
|
||||||
nir_ssa_def *divs[2] = {div, div};
|
nir_ssa_def *divs[2] = {div, div};
|
||||||
intersection.barycentrics =
|
intersection.barycentrics =
|
||||||
nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
|
nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
|
||||||
|
|
||||||
args->triangle_cb(b, &intersection, args);
|
args->triangle_cb(b, &intersection, args, ray_flags);
|
||||||
}
|
}
|
||||||
nir_pop_if(b, NULL);
|
nir_pop_if(b, NULL);
|
||||||
}
|
}
|
||||||
@@ -497,7 +485,8 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b,
|
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b,
|
||||||
const struct radv_ray_traversal_args *args, nir_ssa_def *bvh_node)
|
const struct radv_ray_traversal_args *args,
|
||||||
|
const struct radv_ray_flags *ray_flags, nir_ssa_def *bvh_node)
|
||||||
{
|
{
|
||||||
if (!args->aabb_cb)
|
if (!args->aabb_cb)
|
||||||
return;
|
return;
|
||||||
@@ -509,18 +498,11 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b,
|
|||||||
intersection.primitive_id = nir_channel(b, triangle_info, 0);
|
intersection.primitive_id = nir_channel(b, triangle_info, 0);
|
||||||
intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
|
intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
|
||||||
intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
|
intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
|
||||||
args->flags, intersection.geometry_id_and_flags);
|
ray_flags, intersection.geometry_id_and_flags);
|
||||||
|
|
||||||
nir_ssa_def *not_skip_aabb =
|
nir_ssa_def *not_cull =
|
||||||
nir_inot(b, nir_test_mask(b, args->flags, SpvRayFlagsSkipAABBsKHRMask));
|
nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
|
||||||
nir_ssa_def *not_cull = nir_iand(
|
not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
|
||||||
b, not_skip_aabb,
|
|
||||||
nir_ieq_imm(
|
|
||||||
b,
|
|
||||||
nir_iand(b, args->flags,
|
|
||||||
nir_bcsel(b, intersection.opaque, nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask),
|
|
||||||
nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))),
|
|
||||||
0));
|
|
||||||
nir_push_if(b, not_cull);
|
nir_push_if(b, not_cull);
|
||||||
{
|
{
|
||||||
args->aabb_cb(b, &intersection, args);
|
args->aabb_cb(b, &intersection, args);
|
||||||
@@ -548,6 +530,24 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
|||||||
nir_ssa_def *desc = create_bvh_descriptor(b);
|
nir_ssa_def *desc = create_bvh_descriptor(b);
|
||||||
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
||||||
|
|
||||||
|
struct radv_ray_flags ray_flags = {
|
||||||
|
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
|
||||||
|
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
|
||||||
|
.terminate_on_first_hit =
|
||||||
|
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
|
||||||
|
.no_cull_front = nir_ieq_imm(
|
||||||
|
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
|
||||||
|
.no_cull_back = nir_ieq_imm(
|
||||||
|
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
|
||||||
|
.no_cull_opaque =
|
||||||
|
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
|
||||||
|
.no_cull_no_opaque =
|
||||||
|
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
|
||||||
|
.no_skip_triangles =
|
||||||
|
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
|
||||||
|
.no_skip_aabbs =
|
||||||
|
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
|
||||||
|
};
|
||||||
nir_push_loop(b);
|
nir_push_loop(b);
|
||||||
{
|
{
|
||||||
nir_push_if(
|
nir_push_if(
|
||||||
@@ -648,7 +648,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
|||||||
{
|
{
|
||||||
nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb));
|
nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb));
|
||||||
{
|
{
|
||||||
insert_traversal_aabb_case(device, b, args, global_bvh_node);
|
insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
|
||||||
}
|
}
|
||||||
nir_push_else(b, NULL);
|
nir_push_else(b, NULL);
|
||||||
{
|
{
|
||||||
@@ -761,7 +761,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
|||||||
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
|
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
|
||||||
nir_load_deref(b, args->vars.inv_dir));
|
nir_load_deref(b, args->vars.inv_dir));
|
||||||
}
|
}
|
||||||
insert_traversal_triangle_case(device, b, args, result, global_bvh_node);
|
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
|
||||||
}
|
}
|
||||||
nir_pop_if(b, NULL);
|
nir_pop_if(b, NULL);
|
||||||
}
|
}
|
||||||
|
@@ -54,13 +54,22 @@ nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_d
|
|||||||
|
|
||||||
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
|
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
|
||||||
|
|
||||||
nir_ssa_def *hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags,
|
|
||||||
nir_ssa_def *geometry_id_and_flags);
|
|
||||||
|
|
||||||
nir_ssa_def *create_bvh_descriptor(nir_builder *b);
|
nir_ssa_def *create_bvh_descriptor(nir_builder *b);
|
||||||
|
|
||||||
struct radv_ray_traversal_args;
|
struct radv_ray_traversal_args;
|
||||||
|
|
||||||
|
struct radv_ray_flags {
|
||||||
|
nir_ssa_def *force_opaque;
|
||||||
|
nir_ssa_def *force_not_opaque;
|
||||||
|
nir_ssa_def *terminate_on_first_hit;
|
||||||
|
nir_ssa_def *no_cull_front;
|
||||||
|
nir_ssa_def *no_cull_back;
|
||||||
|
nir_ssa_def *no_cull_opaque;
|
||||||
|
nir_ssa_def *no_cull_no_opaque;
|
||||||
|
nir_ssa_def *no_skip_triangles;
|
||||||
|
nir_ssa_def *no_skip_aabbs;
|
||||||
|
};
|
||||||
|
|
||||||
struct radv_leaf_intersection {
|
struct radv_leaf_intersection {
|
||||||
nir_ssa_def *node_addr;
|
nir_ssa_def *node_addr;
|
||||||
nir_ssa_def *primitive_id;
|
nir_ssa_def *primitive_id;
|
||||||
@@ -82,7 +91,8 @@ struct radv_triangle_intersection {
|
|||||||
|
|
||||||
typedef void (*radv_triangle_intersection_cb)(nir_builder *b,
|
typedef void (*radv_triangle_intersection_cb)(nir_builder *b,
|
||||||
struct radv_triangle_intersection *intersection,
|
struct radv_triangle_intersection *intersection,
|
||||||
const struct radv_ray_traversal_args *args);
|
const struct radv_ray_traversal_args *args,
|
||||||
|
const struct radv_ray_flags *ray_flags);
|
||||||
|
|
||||||
typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
|
typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
|
||||||
const struct radv_ray_traversal_args *args);
|
const struct radv_ray_traversal_args *args);
|
||||||
|
@@ -999,7 +999,8 @@ struct traversal_data {
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
|
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
|
||||||
const struct radv_ray_traversal_args *args)
|
const struct radv_ray_traversal_args *args,
|
||||||
|
const struct radv_ray_flags *ray_flags)
|
||||||
{
|
{
|
||||||
struct traversal_data *data = args->data;
|
struct traversal_data *data = args->data;
|
||||||
|
|
||||||
@@ -1056,10 +1057,8 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||||||
nir_store_var(b, data->vars->idx, sbt_idx, 1);
|
nir_store_var(b, data->vars->idx, sbt_idx, 1);
|
||||||
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
|
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
|
||||||
|
|
||||||
nir_ssa_def *terminate_on_first_hit =
|
|
||||||
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
|
|
||||||
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
|
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
|
||||||
nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
|
nir_push_if(b, nir_ior(b, ray_flags->terminate_on_first_hit, ray_terminated));
|
||||||
{
|
{
|
||||||
nir_jump(b, nir_jump_break);
|
nir_jump(b, nir_jump_break);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user