nir/gather_info: implement partial masking of struct and compact I/O
fossil-db (Sienna): Totals from 138 (0.10% of 138791) affected shaders: CodeSize: 504060 -> 482136 (-4.35%) Instrs: 97318 -> 94518 (-2.88%) Cycles: 389272 -> 378072 (-2.88%) VMEM: 14397 -> 14614 (+1.51%); split: +1.76%, -0.25% SMEM: 9088 -> 9024 (-0.70%) VClause: 2915 -> 2430 (-16.64%) SClause: 1790 -> 1791 (+0.06%) PreVGPRs: 5013 -> 4998 (-0.30%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8364>
This commit is contained in:
@@ -194,7 +194,7 @@ mark_whole_variable(nir_shader *shader, nir_variable *var,
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_io_offset(nir_deref_instr *deref, bool is_vertex_input, bool per_vertex)
|
||||
get_io_offset(nir_deref_instr *deref, bool compact, bool per_vertex)
|
||||
{
|
||||
unsigned offset = 0;
|
||||
|
||||
@@ -206,10 +206,18 @@ get_io_offset(nir_deref_instr *deref, bool is_vertex_input, bool per_vertex)
|
||||
if (!nir_src_is_const(d->arr.index))
|
||||
return -1;
|
||||
|
||||
offset += glsl_count_attribute_slots(d->type, is_vertex_input) *
|
||||
nir_src_as_uint(d->arr.index);
|
||||
if (compact)
|
||||
offset += nir_src_as_uint(d->arr.index) / 4;
|
||||
else
|
||||
offset += glsl_count_attribute_slots(d->type, false) *
|
||||
nir_src_as_uint(d->arr.index);
|
||||
} else if (d->deref_type == nir_deref_type_struct) {
|
||||
const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type;
|
||||
for (unsigned i = 0; i < d->strct.index; i++) {
|
||||
const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i);
|
||||
offset += glsl_count_attribute_slots(field_type, false);
|
||||
}
|
||||
}
|
||||
/* TODO: we can get the offset for structs here see nir_lower_io() */
|
||||
}
|
||||
|
||||
return offset;
|
||||
@@ -238,45 +246,15 @@ try_mask_partial_io(nir_shader *shader, nir_variable *var,
|
||||
if (var->data.per_view)
|
||||
return false;
|
||||
|
||||
/* The code below only handles:
|
||||
*
|
||||
* - Indexing into matrices
|
||||
* - Indexing into arrays of (arrays, matrices, vectors, or scalars)
|
||||
*
|
||||
* For now, we just give up if we see varying structs and arrays of structs
|
||||
* here marking the entire variable as used.
|
||||
*/
|
||||
if (!(glsl_type_is_matrix(type) ||
|
||||
(glsl_type_is_array(type) && !var->data.compact &&
|
||||
(glsl_type_is_numeric(glsl_without_array(type)) ||
|
||||
glsl_type_is_boolean(glsl_without_array(type)))))) {
|
||||
|
||||
/* If we don't know how to handle this case, give up and let the
|
||||
* caller mark the whole variable as used.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned offset = get_io_offset(deref, false, per_vertex);
|
||||
unsigned offset = get_io_offset(deref, var->data.compact, per_vertex);
|
||||
if (offset == -1)
|
||||
return false;
|
||||
|
||||
unsigned num_elems;
|
||||
unsigned elem_width = 1;
|
||||
unsigned mat_cols = 1;
|
||||
if (glsl_type_is_array(type)) {
|
||||
num_elems = glsl_get_aoa_size(type);
|
||||
if (glsl_type_is_matrix(glsl_without_array(type)))
|
||||
mat_cols = glsl_get_matrix_columns(glsl_without_array(type));
|
||||
} else {
|
||||
num_elems = glsl_get_matrix_columns(type);
|
||||
}
|
||||
const unsigned slots =
|
||||
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
|
||||
: glsl_count_attribute_slots(type, false);
|
||||
|
||||
/* double element width for double types that takes two slots */
|
||||
if (glsl_type_is_dual_slot(glsl_without_array(type)))
|
||||
elem_width *= 2;
|
||||
|
||||
if (offset >= num_elems * elem_width * mat_cols) {
|
||||
if (offset >= slots) {
|
||||
/* Constant index outside the bounds of the matrix/array. This could
|
||||
* arise as a result of constant folding of a legal GLSL program.
|
||||
*
|
||||
@@ -289,7 +267,8 @@ try_mask_partial_io(nir_shader *shader, nir_variable *var,
|
||||
return false;
|
||||
}
|
||||
|
||||
set_io_mask(shader, var, offset, elem_width, deref, is_output_read);
|
||||
unsigned len = glsl_count_attribute_slots(deref->type, false);
|
||||
set_io_mask(shader, var, offset, len, deref, is_output_read);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user