From 8a3a0210aeb3f0a4d22b3926bdd1860b880836b0 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 19 Jul 2022 22:09:17 +0300 Subject: [PATCH] intel/nir/rt: store ray query state in scratch Initially I tried to store ray query state in the RT scratch space but got the offset wrong. In the end putting this in the scratch surface makes more sense, especially for non RT stages. Signed-off-by: Lionel Landwerlin Fixes: c78be5da300ae3 ("intel/fs: lower ray query intrinsics") Reviewed-by: Ivan Briano Part-of: (cherry picked from commit 838bbdcf2ef954830061218fcdb1800baa8855b6) --- .pick_status.json | 2 +- .../compiler/brw_nir_lower_ray_queries.c | 37 ++++++++++--------- .../compiler/brw_nir_lower_shader_calls.c | 2 +- src/intel/compiler/brw_nir_rt_builder.h | 1 - 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 4d0b01fb2c2..cbeef4f7605 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1030,7 +1030,7 @@ "description": "intel/nir/rt: store ray query state in scratch", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "c78be5da300ae386a12b91a22efb064335e2043a" }, diff --git a/src/intel/compiler/brw_nir_lower_ray_queries.c b/src/intel/compiler/brw_nir_lower_ray_queries.c index bdbcc975376..7cee1d6e3d3 100644 --- a/src/intel/compiler/brw_nir_lower_ray_queries.c +++ b/src/intel/compiler/brw_nir_lower_ray_queries.c @@ -36,6 +36,8 @@ struct lowering_state { struct brw_nir_rt_globals_defs globals; nir_ssa_def *rq_globals; + + uint32_t state_scratch_base_offset; }; struct brw_ray_query { @@ -43,6 +45,8 @@ struct brw_ray_query { uint32_t id; }; +#define SIZEOF_QUERY_STATE (sizeof(uint32_t)) + static bool need_spill_fill(struct lowering_state *state) { @@ -91,7 +95,7 @@ static nir_ssa_def * get_ray_query_shadow_addr(nir_builder *b, nir_deref_instr *deref, struct lowering_state *state, - nir_ssa_def **out_state_addr) + nir_ssa_def **out_state_offset) { nir_deref_path path; nir_deref_path_init(&path, deref, NULL); @@ -111,14 +115,8 @@ get_ray_query_shadow_addr(nir_builder *b, brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id); bool spill_fill = need_spill_fill(state); - *out_state_addr = - spill_fill ? - nir_iadd_imm(b, - state->globals.resume_sbt_addr, - brw_rt_ray_queries_shadow_stack_size(state->devinfo) * - b->shader->info.ray_queries + - 4 * rq->id) : - state->globals.resume_sbt_addr; + *out_state_offset = nir_imm_int(b, state->state_scratch_base_offset + + SIZEOF_QUERY_STATE * rq->id); if (!spill_fill) return NULL; @@ -130,11 +128,11 @@ get_ray_query_shadow_addr(nir_builder *b, nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1); /**/ - uint32_t local_state_offset = 4 * MAX2(1, glsl_get_aoa_size((*p)->type)); - *out_state_addr = - nir_iadd(b, *out_state_addr, - nir_i2i64(b, - nir_imul_imm(b, index, local_state_offset))); + uint32_t local_state_offset = SIZEOF_QUERY_STATE * + MAX2(1, glsl_get_aoa_size((*p)->type)); + *out_state_offset = + nir_iadd(b, *out_state_offset, + nir_imul_imm(b, index, local_state_offset)); /**/ uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) * @@ -168,13 +166,13 @@ get_ray_query_shadow_addr(nir_builder *b, static void update_trace_ctrl_level(nir_builder *b, - nir_ssa_def *state_addr, + nir_ssa_def *state_scratch_offset, nir_ssa_def **out_old_ctrl, nir_ssa_def **out_old_level, nir_ssa_def *new_ctrl, nir_ssa_def *new_level) { - nir_ssa_def *old_value = brw_nir_rt_load(b, state_addr, 4, 1, 32); + nir_ssa_def *old_value = nir_load_scratch(b, 1, 32, state_scratch_offset, 4); nir_ssa_def *old_ctrl = nir_ishr_imm(b, old_value, 2); nir_ssa_def *old_level = nir_iand_imm(b, old_value, 0x3); @@ -190,7 +188,7 @@ update_trace_ctrl_level(nir_builder *b, new_level = old_level; nir_ssa_def *new_value = nir_ior(b, nir_ishl_imm(b, new_ctrl, 2), new_level); - brw_nir_rt_store(b, state_addr, 4, new_value, 0x1); + nir_store_scratch(b, new_value, state_scratch_offset, 4, 0x1); } } @@ -540,9 +538,12 @@ brw_nir_lower_ray_queries(nir_shader *shader, maybe_create_brw_var(instr, &state); } - bool progress = _mesa_hash_table_num_entries(state.queries) > 0; + bool progress = state.n_queries > 0; if (progress) { + state.state_scratch_base_offset = shader->scratch_size; + shader->scratch_size += SIZEOF_QUERY_STATE * state.n_queries; + lower_ray_query_impl(impl, &state); nir_remove_dead_derefs(shader); diff --git a/src/intel/compiler/brw_nir_lower_shader_calls.c b/src/intel/compiler/brw_nir_lower_shader_calls.c index 90beab18dc9..19ef08e49ab 100644 --- a/src/intel/compiler/brw_nir_lower_shader_calls.c +++ b/src/intel/compiler/brw_nir_lower_shader_calls.c @@ -70,7 +70,7 @@ brw_nir_lower_shader_returns(nir_shader *shader) */ assert(no_load_scratch_base_ptr_intrinsic(shader)); if (shader->info.stage != MESA_SHADER_RAYGEN) - shader->scratch_size = BRW_BTD_STACK_CALLEE_DATA_SIZE; + shader->scratch_size += BRW_BTD_STACK_CALLEE_DATA_SIZE; nir_builder b; nir_builder_init(&b, impl); diff --git a/src/intel/compiler/brw_nir_rt_builder.h b/src/intel/compiler/brw_nir_rt_builder.h index 7495f4bb115..6743684b111 100644 --- a/src/intel/compiler/brw_nir_rt_builder.h +++ b/src/intel/compiler/brw_nir_rt_builder.h @@ -147,7 +147,6 @@ brw_nir_btd_retire(nir_builder *b) static inline void brw_nir_btd_return(struct nir_builder *b) { - assert(b->shader->scratch_size == BRW_BTD_STACK_CALLEE_DATA_SIZE); nir_ssa_def *resume_addr = brw_nir_rt_load_scratch(b, BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET, 8 /* align */, 1, 64);