intel: Use common helpers for TCS passthrough shaders
Rob added these new helpers a while back, which freedreno and radeonsi both share. We should use them too. The new helpers use variables and system value intrinsics, so we can drop the explicit binding table creation and just use the normal paths. Because we have to rewrite the system value uploading anyway, we drop the scrambling of the default tessellation levels on upload, and instead let the compiler go ahead and remap components like any normal shader. In theory, this results in more shuffling in the shader. In practice, we already do MOVs for message setup. In the passthrough shaders I looked at, this resulted in no extra instructions on Icelake (SIMD8 SINGLE_PATCH) and Tigerlake (8_PATCH). On Haswell, one shader grew by a single instruction for a pittance of cycles in a stage that isn't a performance bottleneck anyway. Avoiding remapping wasn't so much of an optimization as just the way that I originally wrote it. Not worth it. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20809>
This commit is contained in:

committed by
Marge Bot

parent
3a9edfc494
commit
96ba0344db
@@ -449,6 +449,8 @@ crocus_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
|
|||||||
unsigned num_system_values = 0;
|
unsigned num_system_values = 0;
|
||||||
|
|
||||||
unsigned patch_vert_idx = -1;
|
unsigned patch_vert_idx = -1;
|
||||||
|
unsigned tess_outer_default_idx = -1;
|
||||||
|
unsigned tess_inner_default_idx = -1;
|
||||||
unsigned ucp_idx[CROCUS_MAX_CLIP_PLANES];
|
unsigned ucp_idx[CROCUS_MAX_CLIP_PLANES];
|
||||||
unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
|
unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
|
||||||
unsigned variable_group_size_idx = -1;
|
unsigned variable_group_size_idx = -1;
|
||||||
@@ -539,6 +541,36 @@ crocus_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
|
|||||||
b.cursor = nir_before_instr(instr);
|
b.cursor = nir_before_instr(instr);
|
||||||
offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
|
offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_load_tess_level_outer_default:
|
||||||
|
if (tess_outer_default_idx == -1) {
|
||||||
|
tess_outer_default_idx = num_system_values;
|
||||||
|
num_system_values += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
system_values[tess_outer_default_idx + i] =
|
||||||
|
BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
b.cursor = nir_before_instr(instr);
|
||||||
|
offset =
|
||||||
|
nir_imm_int(&b, tess_outer_default_idx * sizeof(uint32_t));
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_load_tess_level_inner_default:
|
||||||
|
if (tess_inner_default_idx == -1) {
|
||||||
|
tess_inner_default_idx = num_system_values;
|
||||||
|
num_system_values += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
system_values[tess_inner_default_idx + i] =
|
||||||
|
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
b.cursor = nir_before_instr(instr);
|
||||||
|
offset =
|
||||||
|
nir_imm_int(&b, tess_inner_default_idx * sizeof(uint32_t));
|
||||||
|
break;
|
||||||
case nir_intrinsic_image_deref_load_param_intel: {
|
case nir_intrinsic_image_deref_load_param_intel: {
|
||||||
assert(devinfo->ver < 9);
|
assert(devinfo->ver < 9);
|
||||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||||
@@ -1420,52 +1452,19 @@ crocus_compile_tcs(struct crocus_context *ice,
|
|||||||
|
|
||||||
if (ish) {
|
if (ish) {
|
||||||
nir = nir_shader_clone(mem_ctx, ish->nir);
|
nir = nir_shader_clone(mem_ctx, ish->nir);
|
||||||
|
|
||||||
crocus_setup_uniforms(devinfo, mem_ctx, nir, prog_data, &system_values,
|
|
||||||
&num_system_values, &num_cbufs);
|
|
||||||
|
|
||||||
crocus_lower_swizzles(nir, &key->base.tex);
|
|
||||||
crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
|
|
||||||
num_system_values, num_cbufs, &key->base.tex);
|
|
||||||
if (can_push_ubo(devinfo))
|
|
||||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
|
||||||
} else {
|
} else {
|
||||||
nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, key);
|
nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, key);
|
||||||
|
|
||||||
/* Reserve space for passing the default tess levels as constants. */
|
|
||||||
num_cbufs = 1;
|
|
||||||
num_system_values = 8;
|
|
||||||
system_values =
|
|
||||||
rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
|
|
||||||
prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
|
|
||||||
prog_data->nr_params = num_system_values;
|
|
||||||
|
|
||||||
if (key->_tes_primitive_mode == TESS_PRIMITIVE_QUADS) {
|
|
||||||
for (int i = 0; i < 4; i++)
|
|
||||||
system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
|
||||||
|
|
||||||
system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
|
|
||||||
system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
|
|
||||||
} else if (key->_tes_primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
|
|
||||||
for (int i = 0; i < 3; i++)
|
|
||||||
system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
|
||||||
|
|
||||||
system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
|
|
||||||
} else {
|
|
||||||
assert(key->_tes_primitive_mode == TESS_PRIMITIVE_ISOLINES);
|
|
||||||
system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
|
|
||||||
system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Manually setup the TCS binding table. */
|
|
||||||
memset(&bt, 0, sizeof(bt));
|
|
||||||
bt.sizes[CROCUS_SURFACE_GROUP_UBO] = 1;
|
|
||||||
bt.used_mask[CROCUS_SURFACE_GROUP_UBO] = 1;
|
|
||||||
bt.size_bytes = 4;
|
|
||||||
|
|
||||||
prog_data->ubo_ranges[0].length = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
crocus_setup_uniforms(devinfo, mem_ctx, nir, prog_data, &system_values,
|
||||||
|
&num_system_values, &num_cbufs);
|
||||||
|
|
||||||
|
crocus_lower_swizzles(nir, &key->base.tex);
|
||||||
|
crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
|
||||||
|
num_system_values, num_cbufs, &key->base.tex);
|
||||||
|
if (can_push_ubo(devinfo))
|
||||||
|
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
||||||
|
|
||||||
struct brw_tcs_prog_key key_clean = *key;
|
struct brw_tcs_prog_key key_clean = *key;
|
||||||
crocus_sanitize_tex_key(&key_clean.base.tex);
|
crocus_sanitize_tex_key(&key_clean.base.tex);
|
||||||
|
|
||||||
|
@@ -482,6 +482,8 @@ iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
|
|||||||
unsigned num_system_values = 0;
|
unsigned num_system_values = 0;
|
||||||
|
|
||||||
unsigned patch_vert_idx = -1;
|
unsigned patch_vert_idx = -1;
|
||||||
|
unsigned tess_outer_default_idx = -1;
|
||||||
|
unsigned tess_inner_default_idx = -1;
|
||||||
unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
|
unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
|
||||||
unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
|
unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
|
||||||
unsigned variable_group_size_idx = -1;
|
unsigned variable_group_size_idx = -1;
|
||||||
@@ -581,6 +583,36 @@ iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
|
|||||||
offset = nir_imm_int(&b, system_values_start +
|
offset = nir_imm_int(&b, system_values_start +
|
||||||
patch_vert_idx * sizeof(uint32_t));
|
patch_vert_idx * sizeof(uint32_t));
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_load_tess_level_outer_default:
|
||||||
|
if (tess_outer_default_idx == -1) {
|
||||||
|
tess_outer_default_idx = num_system_values;
|
||||||
|
num_system_values += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
system_values[tess_outer_default_idx + i] =
|
||||||
|
BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
b.cursor = nir_before_instr(instr);
|
||||||
|
offset = nir_imm_int(&b, system_values_start +
|
||||||
|
tess_outer_default_idx * sizeof(uint32_t));
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_load_tess_level_inner_default:
|
||||||
|
if (tess_inner_default_idx == -1) {
|
||||||
|
tess_inner_default_idx = num_system_values;
|
||||||
|
num_system_values += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
system_values[tess_inner_default_idx + i] =
|
||||||
|
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
b.cursor = nir_before_instr(instr);
|
||||||
|
offset = nir_imm_int(&b, system_values_start +
|
||||||
|
tess_inner_default_idx * sizeof(uint32_t));
|
||||||
|
break;
|
||||||
case nir_intrinsic_image_deref_load_param_intel: {
|
case nir_intrinsic_image_deref_load_param_intel: {
|
||||||
assert(devinfo->ver < 9);
|
assert(devinfo->ver < 9);
|
||||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||||
@@ -1530,50 +1562,16 @@ iris_compile_tcs(struct iris_screen *screen,
|
|||||||
|
|
||||||
if (ish) {
|
if (ish) {
|
||||||
nir = nir_shader_clone(mem_ctx, ish->nir);
|
nir = nir_shader_clone(mem_ctx, ish->nir);
|
||||||
|
|
||||||
iris_setup_uniforms(devinfo, mem_ctx, nir, prog_data, 0, &system_values,
|
|
||||||
&num_system_values, &num_cbufs);
|
|
||||||
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
|
|
||||||
num_system_values, num_cbufs);
|
|
||||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
|
||||||
} else {
|
} else {
|
||||||
nir =
|
nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, &brw_key);
|
||||||
brw_nir_create_passthrough_tcs(mem_ctx, compiler, &brw_key);
|
|
||||||
|
|
||||||
/* Reserve space for passing the default tess levels as constants. */
|
|
||||||
num_cbufs = 1;
|
|
||||||
num_system_values = 8;
|
|
||||||
system_values =
|
|
||||||
rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
|
|
||||||
prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
|
|
||||||
prog_data->nr_params = num_system_values;
|
|
||||||
|
|
||||||
if (key->_tes_primitive_mode == TESS_PRIMITIVE_QUADS) {
|
|
||||||
for (int i = 0; i < 4; i++)
|
|
||||||
system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
|
||||||
|
|
||||||
system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
|
|
||||||
system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
|
|
||||||
} else if (key->_tes_primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
|
|
||||||
for (int i = 0; i < 3; i++)
|
|
||||||
system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
|
||||||
|
|
||||||
system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
|
|
||||||
} else {
|
|
||||||
assert(key->_tes_primitive_mode == TESS_PRIMITIVE_ISOLINES);
|
|
||||||
system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
|
|
||||||
system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Manually setup the TCS binding table. */
|
|
||||||
memset(&bt, 0, sizeof(bt));
|
|
||||||
bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
|
|
||||||
bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
|
|
||||||
bt.size_bytes = 4;
|
|
||||||
|
|
||||||
prog_data->ubo_ranges[0].length = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iris_setup_uniforms(devinfo, mem_ctx, nir, prog_data, 0, &system_values,
|
||||||
|
&num_system_values, &num_cbufs);
|
||||||
|
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
|
||||||
|
num_system_values, num_cbufs);
|
||||||
|
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
||||||
|
|
||||||
struct brw_compile_tcs_params params = {
|
struct brw_compile_tcs_params params = {
|
||||||
.nir = nir,
|
.nir = nir,
|
||||||
.key = &brw_key,
|
.key = &brw_key,
|
||||||
|
@@ -189,9 +189,6 @@ remap_patch_urb_offsets(nir_block *block, nir_builder *b,
|
|||||||
const struct brw_vue_map *vue_map,
|
const struct brw_vue_map *vue_map,
|
||||||
enum tess_primitive_mode tes_primitive_mode)
|
enum tess_primitive_mode tes_primitive_mode)
|
||||||
{
|
{
|
||||||
const bool is_passthrough_tcs = b->shader->info.name &&
|
|
||||||
strcmp(b->shader->info.name, "passthrough TCS") == 0;
|
|
||||||
|
|
||||||
nir_foreach_instr_safe(instr, block) {
|
nir_foreach_instr_safe(instr, block) {
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
continue;
|
continue;
|
||||||
@@ -203,8 +200,7 @@ remap_patch_urb_offsets(nir_block *block, nir_builder *b,
|
|||||||
if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
|
if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
|
||||||
(stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
|
(stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
|
||||||
|
|
||||||
if (!is_passthrough_tcs &&
|
if (remap_tess_levels(b, intrin, tes_primitive_mode))
|
||||||
remap_tess_levels(b, intrin, tes_primitive_mode))
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
|
int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
|
||||||
@@ -1858,50 +1854,21 @@ brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compile
|
|||||||
{
|
{
|
||||||
const nir_shader_compiler_options *options =
|
const nir_shader_compiler_options *options =
|
||||||
compiler->nir_options[MESA_SHADER_TESS_CTRL];
|
compiler->nir_options[MESA_SHADER_TESS_CTRL];
|
||||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_TESS_CTRL,
|
|
||||||
options, "passthrough TCS");
|
|
||||||
ralloc_steal(mem_ctx, b.shader);
|
|
||||||
nir_shader *nir = b.shader;
|
|
||||||
nir_variable *var;
|
|
||||||
nir_ssa_def *load;
|
|
||||||
nir_ssa_def *zero = nir_imm_int(&b, 0);
|
|
||||||
nir_ssa_def *invoc_id = nir_load_invocation_id(&b);
|
|
||||||
|
|
||||||
nir->info.inputs_read = key->outputs_written &
|
uint64_t inputs_read = key->outputs_written &
|
||||||
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
||||||
nir->info.outputs_written = key->outputs_written;
|
|
||||||
nir->info.tess.tcs_vertices_out = key->input_vertices;
|
|
||||||
nir->num_uniforms = 8 * sizeof(uint32_t);
|
|
||||||
|
|
||||||
var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0");
|
unsigned locations[64];
|
||||||
var->data.location = 0;
|
unsigned num_locations = 0;
|
||||||
var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1");
|
|
||||||
var->data.location = 1;
|
|
||||||
|
|
||||||
/* Write the patch URB header. */
|
u_foreach_bit64(varying, inputs_read)
|
||||||
for (int i = 0; i <= 1; i++) {
|
locations[num_locations++] = varying;
|
||||||
load = nir_load_uniform(&b, 4, 32, zero, .base = i * 4 * sizeof(uint32_t));
|
|
||||||
|
|
||||||
nir_store_output(&b, load, zero,
|
|
||||||
.base = VARYING_SLOT_TESS_LEVEL_INNER - i,
|
|
||||||
.write_mask = WRITEMASK_XYZW);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy inputs to outputs. */
|
|
||||||
uint64_t varyings = nir->info.inputs_read;
|
|
||||||
|
|
||||||
while (varyings != 0) {
|
|
||||||
const int varying = ffsll(varyings) - 1;
|
|
||||||
|
|
||||||
load = nir_load_per_vertex_input(&b, 4, 32, invoc_id, zero, .base = varying);
|
|
||||||
|
|
||||||
nir_store_per_vertex_output(&b, load, invoc_id, zero,
|
|
||||||
.base = varying,
|
|
||||||
.write_mask = WRITEMASK_XYZW);
|
|
||||||
|
|
||||||
varyings &= ~BITFIELD64_BIT(varying);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
nir_shader *nir =
|
||||||
|
nir_create_passthrough_tcs_impl(options, locations, num_locations,
|
||||||
|
key->input_vertices);
|
||||||
|
nir->info.inputs_read = inputs_read;
|
||||||
|
nir->info.tess._primitive_mode = key->_tes_primitive_mode;
|
||||||
nir_validate_shader(nir, "in brw_nir_create_passthrough_tcs");
|
nir_validate_shader(nir, "in brw_nir_create_passthrough_tcs");
|
||||||
|
|
||||||
struct brw_nir_compiler_opts opts = {};
|
struct brw_nir_compiler_opts opts = {};
|
||||||
|
Reference in New Issue
Block a user