From ac01f09d57b47e9bc6e27da122a5bb0ab43f6908 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Wed, 19 Oct 2022 12:05:33 +0200 Subject: [PATCH] radv/rt: Load instance id and custom index on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stats for Quae II RTX: 57fps -> 57fps Totals from 7 (14.00% of 50) affected shaders: VGPRs: 800 -> 784 (-2.00%) CodeSize: 217868 -> 218308 (+0.20%) MaxWaves: 62 -> 63 (+1.61%) Instrs: 40384 -> 40420 (+0.09%); split: -0.01%, +0.10% Latency: 866315 -> 870692 (+0.51%) InvThroughput: 199189 -> 196595 (-1.30%); split: -1.75%, +0.45% VClause: 1058 -> 1077 (+1.80%) SClause: 1126 -> 1130 (+0.36%) Copies: 5787 -> 5772 (-0.26%); split: -0.40%, +0.14% PreVGPRs: 764 -> 750 (-1.83%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/vulkan/radv_nir_lower_ray_queries.c | 39 ++++++++--------- src/amd/vulkan/radv_pipeline_rt.c | 48 +++++---------------- src/amd/vulkan/radv_rt_common.c | 7 --- src/amd/vulkan/radv_rt_common.h | 2 - 4 files changed, 30 insertions(+), 66 deletions(-) diff --git a/src/amd/vulkan/radv_nir_lower_ray_queries.c b/src/amd/vulkan/radv_nir_lower_ray_queries.c index 2177c936cf9..06645ecc5a7 100644 --- a/src/amd/vulkan/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/radv_nir_lower_ray_queries.c @@ -155,12 +155,10 @@ struct ray_query_traversal_vars { struct ray_query_intersection_vars { rq_variable *primitive_id; rq_variable *geometry_id_and_flags; - rq_variable *instance_id; rq_variable *instance_addr; rq_variable *intersection_type; rq_variable *opaque; rq_variable *frontface; - rq_variable *custom_instance_and_mask; rq_variable *sbt_offset_and_flags; rq_variable *barycentrics; rq_variable *t; @@ -231,8 +229,6 @@ init_ray_query_intersection_vars(nir_shader *shader, nir_function_impl *impl, un rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_primitive_id")); result.geometry_id_and_flags = rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_geometry_id_and_flags")); - result.instance_id = - rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_instance_id")); result.instance_addr = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(), VAR_NAME("_instance_addr")); result.intersection_type = rq_variable_create(shader, impl, array_length, glsl_uint_type(), @@ -241,8 +237,6 @@ init_ray_query_intersection_vars(nir_shader *shader, nir_function_impl *impl, un rq_variable_create(shader, impl, array_length, glsl_bool_type(), VAR_NAME("_opaque")); result.frontface = rq_variable_create(shader, impl, array_length, glsl_bool_type(), VAR_NAME("_frontface")); - result.custom_instance_and_mask = rq_variable_create( - shader, impl, array_length, glsl_uint_type(), VAR_NAME("_custom_instance_and_mask")); result.sbt_offset_and_flags = rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_sbt_offset_and_flags")); result.barycentrics = @@ -307,12 +301,9 @@ static void copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars) { rq_copy_var(b, index, vars->closest.barycentrics, vars->candidate.barycentrics, 0x3); - rq_copy_var(b, index, vars->closest.custom_instance_and_mask, - vars->candidate.custom_instance_and_mask, 0x1); rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags, 0x1); rq_copy_var(b, index, vars->closest.instance_addr, vars->candidate.instance_addr, 0x1); - rq_copy_var(b, index, vars->closest.instance_id, vars->candidate.instance_id, 0x1); rq_copy_var(b, index, vars->closest.intersection_type, vars->candidate.intersection_type, 0x1); rq_copy_var(b, index, vars->closest.opaque, vars->candidate.opaque, 0x1); rq_copy_var(b, index, vars->closest.frontface, vars->candidate.frontface, 0x1); @@ -443,15 +434,25 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.geometry_id_and_flags), rq_load_var(b, index, vars->candidate.geometry_id_and_flags)), 0xFFFFFF); - case nir_ray_query_value_intersection_instance_custom_index: - return nir_iand_imm( - b, - nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.custom_instance_and_mask), - rq_load_var(b, index, vars->candidate.custom_instance_and_mask)), - 0xFFFFFF); - case nir_ray_query_value_intersection_instance_id: - return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_id), - rq_load_var(b, index, vars->candidate.instance_id)); + case nir_ray_query_value_intersection_instance_custom_index: { + nir_ssa_def *instance_node_addr = + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), + rq_load_var(b, index, vars->candidate.instance_addr)); + return nir_iand_imm(b, + nir_build_load_global(b, 1, 32, + nir_iadd_imm(b, instance_node_addr, + offsetof(struct radv_bvh_instance_node, + custom_instance_and_mask))), + 0xFFFFFF); + } + case nir_ray_query_value_intersection_instance_id: { + nir_ssa_def *instance_node_addr = + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), + rq_load_var(b, index, vars->candidate.instance_addr)); + return nir_build_load_global( + b, 1, 32, + nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id))); + } case nir_ray_query_value_intersection_instance_sbt_index: return nir_iand_imm( b, @@ -647,9 +648,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars .previous_node = rq_deref_var(b, index, vars->trav.previous_node), .instance_top_node = rq_deref_var(b, index, vars->trav.instance_top_node), .instance_bottom_node = rq_deref_var(b, index, vars->trav.instance_bottom_node), - .instance_id = rq_deref_var(b, index, vars->candidate.instance_id), .instance_addr = rq_deref_var(b, index, vars->candidate.instance_addr), - .custom_instance_and_mask = rq_deref_var(b, index, vars->candidate.custom_instance_and_mask), .sbt_offset_and_flags = rq_deref_var(b, index, vars->candidate.sbt_offset_and_flags), }; diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 9ceb4ddc9ed..23dd7100aa2 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -217,13 +217,9 @@ struct rt_variables { nir_variable *direction; nir_variable *tmax; - /* from the BTAS instance currently being visited */ - nir_variable *custom_instance_and_mask; - /* Properties of the primitive currently being visited. */ nir_variable *primitive_id; nir_variable *geometry_id_and_flags; - nir_variable *instance_id; nir_variable *instance_addr; nir_variable *hit_kind; nir_variable *opaque; @@ -282,14 +278,10 @@ create_rt_variables(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction"); vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax"); - vars.custom_instance_and_mask = nir_variable_create( - shader, nir_var_shader_temp, glsl_uint_type(), "custom_instance_and_mask"); vars.primitive_id = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id"); vars.geometry_id_and_flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags"); - vars.instance_id = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "instance_id"); vars.instance_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind"); @@ -329,10 +321,8 @@ map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, _mesa_hash_table_insert(var_remap, src->direction, dst->direction); _mesa_hash_table_insert(var_remap, src->tmax, dst->tmax); - _mesa_hash_table_insert(var_remap, src->custom_instance_and_mask, dst->custom_instance_and_mask); _mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id); _mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags); - _mesa_hash_table_insert(var_remap, src->instance_id, dst->instance_id); _mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr); _mesa_hash_table_insert(var_remap, src->hit_kind, dst->hit_kind); _mesa_hash_table_insert(var_remap, src->opaque, dst->opaque); @@ -363,14 +353,10 @@ create_inner_vars(nir_builder *b, const struct rt_variables *vars) b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags"); inner_vars.tmax = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax"); - inner_vars.instance_id = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_instance_id"); inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_instance_addr"); inner_vars.hit_kind = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind"); - inner_vars.custom_instance_and_mask = nir_variable_create( - b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_custom_instance_and_mask"); return inner_vars; } @@ -573,8 +559,12 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca break; } case nir_intrinsic_load_ray_instance_custom_index: { - ret = nir_load_var(&b_shader, vars->custom_instance_and_mask); - ret = nir_iand_imm(&b_shader, ret, 0xFFFFFF); + nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); + nir_ssa_def *custom_instance_and_mask = nir_build_load_global( + &b_shader, 1, 32, + nir_iadd_imm(&b_shader, instance_node_addr, + offsetof(struct radv_bvh_instance_node, custom_instance_and_mask))); + ret = nir_iand_imm(&b_shader, custom_instance_and_mask, 0xFFFFFF); break; } case nir_intrinsic_load_primitive_id: { @@ -587,7 +577,11 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca break; } case nir_intrinsic_load_instance_id: { - ret = nir_load_var(&b_shader, vars->instance_id); + nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); + ret = nir_build_load_global( + &b_shader, 1, 32, + nir_iadd_imm(&b_shader, instance_node_addr, + offsetof(struct radv_bvh_instance_node, instance_id))); break; } case nir_intrinsic_load_ray_flags: { @@ -1027,8 +1021,6 @@ struct rt_traversal_vars { nir_variable *dir; nir_variable *inv_dir; nir_variable *sbt_offset_and_flags; - nir_variable *instance_id; - nir_variable *custom_instance_and_mask; nir_variable *instance_addr; nir_variable *hit; nir_variable *bvh_base; @@ -1053,10 +1045,6 @@ init_traversal_vars(nir_builder *b) nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir"); ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_sbt_offset_and_flags"); - ret.instance_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), - "traversal_instance_id"); - ret.custom_instance_and_mask = nir_variable_create( - b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_custom_instance_and_mask"); ret.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); ret.hit = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(), "traversal_hit"); @@ -1149,12 +1137,9 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1); nir_store_var(b, inner_vars.tmax, intersection->t, 0x1); - nir_store_var(b, inner_vars.instance_id, nir_load_var(b, data->trav_vars->instance_id), 0x1); nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1); - nir_store_var(b, inner_vars.custom_instance_and_mask, - nir_load_var(b, data->trav_vars->custom_instance_and_mask), 0x1); load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); @@ -1171,12 +1156,9 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int nir_store_var(b, data->vars->primitive_id, intersection->base.primitive_id, 1); nir_store_var(b, data->vars->geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1); nir_store_var(b, data->vars->tmax, intersection->t, 0x1); - nir_store_var(b, data->vars->instance_id, nir_load_var(b, data->trav_vars->instance_id), 0x1); nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); nir_store_var(b, data->vars->hit_kind, hit_kind, 0x1); - nir_store_var(b, data->vars->custom_instance_and_mask, - nir_load_var(b, data->trav_vars->custom_instance_and_mask), 0x1); nir_store_var(b, data->vars->idx, sbt_idx, 1); nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1); @@ -1213,10 +1195,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio nir_store_var(b, inner_vars.primitive_id, intersection->primitive_id, 1); nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->geometry_id_and_flags, 1); nir_store_var(b, inner_vars.tmax, nir_load_var(b, data->vars->tmax), 0x1); - nir_store_var(b, inner_vars.instance_id, nir_load_var(b, data->trav_vars->instance_id), 0x1); nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); - nir_store_var(b, inner_vars.custom_instance_and_mask, - nir_load_var(b, data->trav_vars->custom_instance_and_mask), 0x1); nir_store_var(b, inner_vars.opaque, intersection->opaque, 1); load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); @@ -1301,11 +1280,8 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio nir_store_var(b, data->vars->primitive_id, intersection->primitive_id, 1); nir_store_var(b, data->vars->geometry_id_and_flags, intersection->geometry_id_and_flags, 1); nir_store_var(b, data->vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1); - nir_store_var(b, data->vars->instance_id, nir_load_var(b, data->trav_vars->instance_id), 0x1); nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); - nir_store_var(b, data->vars->custom_instance_and_mask, - nir_load_var(b, data->trav_vars->custom_instance_and_mask), 0x1); nir_store_var(b, data->vars->idx, sbt_idx, 1); nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1); @@ -1397,9 +1373,7 @@ build_traversal_shader(struct radv_device *device, .previous_node = nir_build_deref_var(&b, trav_vars.previous_node), .instance_top_node = nir_build_deref_var(&b, trav_vars.instance_top_node), .instance_bottom_node = nir_build_deref_var(&b, trav_vars.instance_bottom_node), - .instance_id = nir_build_deref_var(&b, trav_vars.instance_id), .instance_addr = nir_build_deref_var(&b, trav_vars.instance_addr), - .custom_instance_and_mask = nir_build_deref_var(&b, trav_vars.custom_instance_and_mask), .sbt_offset_and_flags = nir_build_deref_var(&b, trav_vars.sbt_offset_and_flags), }; diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index b6e33d8bc8b..5fe2233ecef 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -666,11 +666,6 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, nir_ssa_def *wto_matrix[3]; nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); - nir_ssa_def *instance_id = nir_build_load_global( - b, 1, 32, - nir_iadd_imm(b, instance_node_addr, - offsetof(struct radv_bvh_instance_node, instance_id))); - nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); nir_store_deref(b, args->vars.bvh_base, @@ -695,8 +690,6 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1); - nir_store_deref(b, args->vars.custom_instance_and_mask, instance_and_mask, 1); - nir_store_deref(b, args->vars.instance_id, instance_id, 1); nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); } nir_pop_if(b, NULL); diff --git a/src/amd/vulkan/radv_rt_common.h b/src/amd/vulkan/radv_rt_common.h index accf7c03690..e90a364bbd4 100644 --- a/src/amd/vulkan/radv_rt_common.h +++ b/src/amd/vulkan/radv_rt_common.h @@ -123,9 +123,7 @@ struct radv_ray_traversal_vars { nir_deref_instr *instance_bottom_node; /* Information about the current instance used for culling. */ - nir_deref_instr *instance_id; nir_deref_instr *instance_addr; - nir_deref_instr *custom_instance_and_mask; nir_deref_instr *sbt_offset_and_flags; };