intel/fs: make tcs input_vertices dynamic
We need to do 3 things to accomplish this : 1. make all the register access consider the maximal case when unknown at compile time 2. move the clamping of load_per_vertex_input prior to lowering nir_intrinsic_load_patch_vertices_in (in the dynamic cases, the clamping will use the nir_intrinsic_load_patch_vertices_in to clamp), meaning clamping using derefs rather than lowered nir_intrinsic_load_per_vertex_input 3. in the known cases, lower nir_intrinsic_load_patch_vertices_in in NIR (so that the clamped elements still be vectorized to the smallest number of URB read messages) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22378>
This commit is contained in:

committed by
Marge Bot

parent
32b7659fff
commit
429ef02f83
@@ -322,6 +322,7 @@ struct brw_tcs_prog_key
|
|||||||
|
|
||||||
enum tess_primitive_mode _tes_primitive_mode;
|
enum tess_primitive_mode _tes_primitive_mode;
|
||||||
|
|
||||||
|
/** Number of input vertices, 0 means dynamic */
|
||||||
unsigned input_vertices;
|
unsigned input_vertices;
|
||||||
|
|
||||||
/** A bitfield of per-patch outputs written. */
|
/** A bitfield of per-patch outputs written. */
|
||||||
@@ -331,6 +332,15 @@ struct brw_tcs_prog_key
|
|||||||
uint32_t padding:24;
|
uint32_t padding:24;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define BRW_MAX_TCS_INPUT_VERTICES (32)
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
brw_tcs_prog_key_input_vertices(const struct brw_tcs_prog_key *key)
|
||||||
|
{
|
||||||
|
return key->input_vertices != 0 ?
|
||||||
|
key->input_vertices : BRW_MAX_TCS_INPUT_VERTICES;
|
||||||
|
}
|
||||||
|
|
||||||
/** The program key for Tessellation Evaluation Shaders. */
|
/** The program key for Tessellation Evaluation Shaders. */
|
||||||
struct brw_tes_prog_key
|
struct brw_tes_prog_key
|
||||||
{
|
{
|
||||||
|
@@ -2665,7 +2665,8 @@ fs_visitor::get_tcs_multi_patch_icp_handle(const fs_builder &bld,
|
|||||||
* we might read up to nir->info.gs.vertices_in registers.
|
* we might read up to nir->info.gs.vertices_in registers.
|
||||||
*/
|
*/
|
||||||
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start,
|
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start,
|
||||||
icp_offset_bytes, brw_imm_ud(tcs_key->input_vertices * REG_SIZE));
|
icp_offset_bytes,
|
||||||
|
brw_imm_ud(brw_tcs_prog_key_input_vertices(tcs_key) * REG_SIZE));
|
||||||
|
|
||||||
return icp_handle;
|
return icp_handle;
|
||||||
}
|
}
|
||||||
@@ -2675,7 +2676,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||||||
nir_intrinsic_instr *instr)
|
nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
assert(stage == MESA_SHADER_TESS_CTRL);
|
assert(stage == MESA_SHADER_TESS_CTRL);
|
||||||
struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
|
|
||||||
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
|
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
|
||||||
struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
|
struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
|
||||||
|
|
||||||
@@ -2690,10 +2690,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||||||
case nir_intrinsic_load_invocation_id:
|
case nir_intrinsic_load_invocation_id:
|
||||||
bld.MOV(retype(dst, invocation_id.type), invocation_id);
|
bld.MOV(retype(dst, invocation_id.type), invocation_id);
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_load_patch_vertices_in:
|
|
||||||
bld.MOV(retype(dst, BRW_REGISTER_TYPE_D),
|
|
||||||
brw_imm_d(tcs_key->input_vertices));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case nir_intrinsic_scoped_barrier:
|
case nir_intrinsic_scoped_barrier:
|
||||||
if (nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE)
|
if (nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE)
|
||||||
|
@@ -48,7 +48,7 @@ tcs_thread_payload::tcs_thread_payload(const fs_visitor &v)
|
|||||||
num_regs = 5;
|
num_regs = 5;
|
||||||
} else {
|
} else {
|
||||||
assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_MULTI_PATCH);
|
assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_MULTI_PATCH);
|
||||||
assert(tcs_key->input_vertices > 0);
|
assert(tcs_key->input_vertices <= BRW_MAX_TCS_INPUT_VERTICES);
|
||||||
|
|
||||||
patch_urb_output = brw_ud8_grf(1, 0);
|
patch_urb_output = brw_ud8_grf(1, 0);
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ tcs_thread_payload::tcs_thread_payload(const fs_visitor &v)
|
|||||||
|
|
||||||
/* ICP handles occupy the next 1-32 registers. */
|
/* ICP handles occupy the next 1-32 registers. */
|
||||||
icp_handle_start = brw_ud8_grf(r, 0);
|
icp_handle_start = brw_ud8_grf(r, 0);
|
||||||
r += tcs_key->input_vertices;
|
r += brw_tcs_prog_key_input_vertices(tcs_key);
|
||||||
|
|
||||||
num_regs = r;
|
num_regs = r;
|
||||||
}
|
}
|
||||||
|
@@ -1049,6 +1049,14 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
|
|||||||
nir_var_mem_ubo | nir_var_mem_ssbo,
|
nir_var_mem_ubo | nir_var_mem_ssbo,
|
||||||
nir_lower_direct_array_deref_of_vec_load);
|
nir_lower_direct_array_deref_of_vec_load);
|
||||||
|
|
||||||
|
/* Clamp load_per_vertex_input of the TCS stage so that we do not generate
|
||||||
|
* loads reading out of bounds. We can do this here because we called
|
||||||
|
* nir_lower_system_values above.
|
||||||
|
*/
|
||||||
|
if (nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||||
|
compiler->use_tcs_multi_patch)
|
||||||
|
OPT(brw_nir_clamp_per_vertex_loads);
|
||||||
|
|
||||||
/* Get rid of split copies */
|
/* Get rid of split copies */
|
||||||
brw_nir_optimize(nir, compiler);
|
brw_nir_optimize(nir, compiler);
|
||||||
}
|
}
|
||||||
@@ -1985,6 +1993,8 @@ nir_shader *
|
|||||||
brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,
|
brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,
|
||||||
const struct brw_tcs_prog_key *key)
|
const struct brw_tcs_prog_key *key)
|
||||||
{
|
{
|
||||||
|
assert(key->input_vertices > 0);
|
||||||
|
|
||||||
const nir_shader_compiler_options *options =
|
const nir_shader_compiler_options *options =
|
||||||
compiler->nir_options[MESA_SHADER_TESS_CTRL];
|
compiler->nir_options[MESA_SHADER_TESS_CTRL];
|
||||||
|
|
||||||
|
@@ -98,6 +98,9 @@ struct brw_nir_compiler_opts {
|
|||||||
|
|
||||||
/* Whether robust image access is enabled */
|
/* Whether robust image access is enabled */
|
||||||
bool robust_image_access;
|
bool robust_image_access;
|
||||||
|
|
||||||
|
/* Input vertices for TCS stage (0 means dynamic) */
|
||||||
|
unsigned input_vertices;
|
||||||
};
|
};
|
||||||
|
|
||||||
void brw_preprocess_nir(const struct brw_compiler *compiler,
|
void brw_preprocess_nir(const struct brw_compiler *compiler,
|
||||||
@@ -191,8 +194,9 @@ bool brw_nir_opt_peephole_ffma(nir_shader *shader);
|
|||||||
|
|
||||||
bool brw_nir_opt_peephole_imul32x16(nir_shader *shader);
|
bool brw_nir_opt_peephole_imul32x16(nir_shader *shader);
|
||||||
|
|
||||||
bool brw_nir_clamp_per_vertex_loads(nir_shader *shader,
|
bool brw_nir_clamp_per_vertex_loads(nir_shader *shader);
|
||||||
unsigned input_vertices);
|
|
||||||
|
bool brw_nir_lower_patch_vertices_in(nir_shader *shader, unsigned input_vertices);
|
||||||
|
|
||||||
bool brw_nir_blockify_uniform_loads(nir_shader *shader,
|
bool brw_nir_blockify_uniform_loads(nir_shader *shader,
|
||||||
const struct intel_device_info *devinfo);
|
const struct intel_device_info *devinfo);
|
||||||
|
@@ -24,38 +24,93 @@
|
|||||||
/*
|
/*
|
||||||
* Limit input per vertex input accesses. This is useful for the tesselation stages.
|
* Limit input per vertex input accesses. This is useful for the tesselation stages.
|
||||||
* On Gfx12.5+ out of bound accesses generate hangs.
|
* On Gfx12.5+ out of bound accesses generate hangs.
|
||||||
|
*
|
||||||
|
* This pass operates on derefs, it must be called before shader inputs are
|
||||||
|
* lowered.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "brw_nir.h"
|
#include "brw_nir.h"
|
||||||
#include "compiler/nir/nir_builder.h"
|
#include "compiler/nir/nir_builder.h"
|
||||||
|
#include "compiler/nir/nir_deref.h"
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
clamp_per_vertex_loads_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||||
{
|
{
|
||||||
unsigned *input_vertices = cb_data;
|
|
||||||
|
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
if (intrin->intrinsic != nir_intrinsic_load_per_vertex_input)
|
if (intrin->intrinsic != nir_intrinsic_load_deref)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
|
||||||
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||||
|
if (var == NULL || (var->data.mode & nir_var_shader_in) == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_deref_path path;
|
||||||
|
nir_deref_path_init(&path, deref, cb_data);
|
||||||
|
|
||||||
|
bool progress = false;
|
||||||
|
for (unsigned i = 0; path.path[i]; i++) {
|
||||||
|
if (path.path[i]->deref_type != nir_deref_type_array)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
b->cursor = nir_before_instr(&path.path[i]->instr);
|
||||||
|
|
||||||
|
nir_instr_rewrite_src_ssa(&path.path[i]->instr,
|
||||||
|
&path.path[i]->arr.index,
|
||||||
|
nir_umin(b,
|
||||||
|
path.path[i]->arr.index.ssa,
|
||||||
|
nir_iadd_imm(b, nir_load_patch_vertices_in(b), -1)));
|
||||||
|
|
||||||
|
progress = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_deref_path_finish(&path);
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_nir_clamp_per_vertex_loads(nir_shader *shader)
|
||||||
|
{
|
||||||
|
void *mem_ctx = ralloc_context(NULL);
|
||||||
|
|
||||||
|
bool ret = nir_shader_instructions_pass(shader, clamp_per_vertex_loads_instr,
|
||||||
|
nir_metadata_block_index |
|
||||||
|
nir_metadata_dominance,
|
||||||
|
mem_ctx);
|
||||||
|
|
||||||
|
ralloc_free(mem_ctx);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_patch_vertices_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||||
|
{
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_patch_vertices_in)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned *input_vertices = cb_data;
|
||||||
|
|
||||||
b->cursor = nir_before_instr(instr);
|
b->cursor = nir_before_instr(instr);
|
||||||
|
|
||||||
nir_instr_rewrite_src_ssa(instr,
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_imm_int(b, *input_vertices));
|
||||||
&intrin->src[0],
|
|
||||||
nir_imin(b, intrin->src[0].ssa,
|
|
||||||
nir_imm_int(b, *input_vertices - 1)));
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
brw_nir_clamp_per_vertex_loads(nir_shader *shader,
|
brw_nir_lower_patch_vertices_in(nir_shader *shader, unsigned input_vertices)
|
||||||
unsigned input_vertices)
|
|
||||||
{
|
{
|
||||||
return nir_shader_instructions_pass(shader, lower_instr,
|
return nir_shader_instructions_pass(shader, lower_patch_vertices_instr,
|
||||||
nir_metadata_block_index |
|
nir_metadata_block_index |
|
||||||
nir_metadata_dominance,
|
nir_metadata_dominance,
|
||||||
&input_vertices);
|
&input_vertices);
|
||||||
|
@@ -385,8 +385,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||||||
key->_tes_primitive_mode);
|
key->_tes_primitive_mode);
|
||||||
if (key->quads_workaround)
|
if (key->quads_workaround)
|
||||||
brw_nir_apply_tcs_quads_workaround(nir);
|
brw_nir_apply_tcs_quads_workaround(nir);
|
||||||
if (compiler->use_tcs_multi_patch)
|
if (key->input_vertices > 0)
|
||||||
brw_nir_clamp_per_vertex_loads(nir, key->input_vertices);
|
brw_nir_lower_patch_vertices_in(nir, key->input_vertices);
|
||||||
|
|
||||||
brw_postprocess_nir(nir, compiler, debug_enabled,
|
brw_postprocess_nir(nir, compiler, debug_enabled,
|
||||||
key->base.robust_buffer_access);
|
key->base.robust_buffer_access);
|
||||||
|
Reference in New Issue
Block a user