nir/gather_info: implement partial masking of struct and compact I/O

fossil-db (Sienna):
Totals from 138 (0.10% of 138791) affected shaders:
CodeSize: 504060 -> 482136 (-4.35%)
Instrs: 97318 -> 94518 (-2.88%)
Cycles: 389272 -> 378072 (-2.88%)
VMEM: 14397 -> 14614 (+1.51%); split: +1.76%, -0.25%
SMEM: 9088 -> 9024 (-0.70%)
VClause: 2915 -> 2430 (-16.64%)
SClause: 1790 -> 1791 (+0.06%)
PreVGPRs: 5013 -> 4998 (-0.30%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8364>
This commit is contained in:
Rhys Perry
2021-01-06 15:24:26 +00:00
committed by Marge Bot
parent 8731a1beb7
commit cc7a187411

View File

@@ -194,7 +194,7 @@ mark_whole_variable(nir_shader *shader, nir_variable *var,
}
static unsigned
get_io_offset(nir_deref_instr *deref, bool is_vertex_input, bool per_vertex)
get_io_offset(nir_deref_instr *deref, bool compact, bool per_vertex)
{
unsigned offset = 0;
@@ -206,10 +206,18 @@ get_io_offset(nir_deref_instr *deref, bool is_vertex_input, bool per_vertex)
if (!nir_src_is_const(d->arr.index))
return -1;
offset += glsl_count_attribute_slots(d->type, is_vertex_input) *
nir_src_as_uint(d->arr.index);
if (compact)
offset += nir_src_as_uint(d->arr.index) / 4;
else
offset += glsl_count_attribute_slots(d->type, false) *
nir_src_as_uint(d->arr.index);
} else if (d->deref_type == nir_deref_type_struct) {
const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type;
for (unsigned i = 0; i < d->strct.index; i++) {
const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i);
offset += glsl_count_attribute_slots(field_type, false);
}
}
/* TODO: we can get the offset for structs here see nir_lower_io() */
}
return offset;
@@ -238,45 +246,15 @@ try_mask_partial_io(nir_shader *shader, nir_variable *var,
if (var->data.per_view)
return false;
/* The code below only handles:
*
* - Indexing into matrices
* - Indexing into arrays of (arrays, matrices, vectors, or scalars)
*
* For now, we just give up if we see varying structs and arrays of structs
* here marking the entire variable as used.
*/
if (!(glsl_type_is_matrix(type) ||
(glsl_type_is_array(type) && !var->data.compact &&
(glsl_type_is_numeric(glsl_without_array(type)) ||
glsl_type_is_boolean(glsl_without_array(type)))))) {
/* If we don't know how to handle this case, give up and let the
* caller mark the whole variable as used.
*/
return false;
}
unsigned offset = get_io_offset(deref, false, per_vertex);
unsigned offset = get_io_offset(deref, var->data.compact, per_vertex);
if (offset == -1)
return false;
unsigned num_elems;
unsigned elem_width = 1;
unsigned mat_cols = 1;
if (glsl_type_is_array(type)) {
num_elems = glsl_get_aoa_size(type);
if (glsl_type_is_matrix(glsl_without_array(type)))
mat_cols = glsl_get_matrix_columns(glsl_without_array(type));
} else {
num_elems = glsl_get_matrix_columns(type);
}
const unsigned slots =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
: glsl_count_attribute_slots(type, false);
/* double element width for double types that takes two slots */
if (glsl_type_is_dual_slot(glsl_without_array(type)))
elem_width *= 2;
if (offset >= num_elems * elem_width * mat_cols) {
if (offset >= slots) {
/* Constant index outside the bounds of the matrix/array. This could
* arise as a result of constant folding of a legal GLSL program.
*
@@ -289,7 +267,8 @@ try_mask_partial_io(nir_shader *shader, nir_variable *var,
return false;
}
set_io_mask(shader, var, offset, elem_width, deref, is_output_read);
unsigned len = glsl_count_attribute_slots(deref->type, false);
set_io_mask(shader, var, offset, len, deref, is_output_read);
return true;
}