nir: Add a array-of-vector variable shrinking pass
This pass looks for variables with vector or array-of-vector types and narrows the type to only the components used. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
@@ -2659,6 +2659,7 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);
|
||||
|
||||
int nir_gs_count_vertices(const nir_shader *shader);
|
||||
|
||||
bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
|
||||
bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
|
||||
bool nir_split_var_copies(nir_shader *shader);
|
||||
bool nir_split_per_member_structs(nir_shader *shader);
|
||||
|
@@ -26,6 +26,9 @@
|
||||
#include "nir_deref.h"
|
||||
#include "nir_vla.h"
|
||||
|
||||
/* Needed for _mesa_bitcount() */
|
||||
#include "main/macros.h"
|
||||
|
||||
struct split_var_state {
|
||||
void *mem_ctx;
|
||||
|
||||
@@ -856,3 +859,717 @@ nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
struct array_level_usage {
|
||||
unsigned array_len;
|
||||
|
||||
/* The value UINT_MAX will be used to indicate an indirect */
|
||||
unsigned max_read;
|
||||
unsigned max_written;
|
||||
|
||||
/* True if there is a copy that isn't to/from a shrinkable array */
|
||||
bool has_external_copy;
|
||||
struct set *levels_copied;
|
||||
};
|
||||
|
||||
struct vec_var_usage {
|
||||
/* Convenience set of all components this variable has */
|
||||
nir_component_mask_t all_comps;
|
||||
|
||||
nir_component_mask_t comps_read;
|
||||
nir_component_mask_t comps_written;
|
||||
|
||||
nir_component_mask_t comps_kept;
|
||||
|
||||
/* True if there is a copy that isn't to/from a shrinkable vector */
|
||||
bool has_external_copy;
|
||||
struct set *vars_copied;
|
||||
|
||||
unsigned num_levels;
|
||||
struct array_level_usage levels[0];
|
||||
};
|
||||
|
||||
static struct vec_var_usage *
|
||||
get_vec_var_usage(nir_variable *var,
|
||||
struct hash_table *var_usage_map,
|
||||
bool add_usage_entry, void *mem_ctx)
|
||||
{
|
||||
struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
|
||||
if (entry)
|
||||
return entry->data;
|
||||
|
||||
if (!add_usage_entry)
|
||||
return NULL;
|
||||
|
||||
/* Check to make sure that we are working with an array of vectors. We
|
||||
* don't bother to shrink single vectors because we figure that we can
|
||||
* clean it up better with SSA than by inserting piles of vecN instructions
|
||||
* to compact results.
|
||||
*/
|
||||
int num_levels = num_array_levels_in_array_of_vector_type(var->type);
|
||||
if (num_levels < 1)
|
||||
return NULL; /* Not an array of vectors */
|
||||
|
||||
struct vec_var_usage *usage =
|
||||
rzalloc_size(mem_ctx, sizeof(*usage) +
|
||||
num_levels * sizeof(usage->levels[0]));
|
||||
|
||||
usage->num_levels = num_levels;
|
||||
const struct glsl_type *type = var->type;
|
||||
for (unsigned i = 0; i < num_levels; i++) {
|
||||
usage->levels[i].array_len = glsl_get_length(type);
|
||||
type = glsl_get_array_element(type);
|
||||
}
|
||||
assert(glsl_type_is_vector_or_scalar(type));
|
||||
|
||||
usage->all_comps = (1 << glsl_get_components(type)) - 1;
|
||||
|
||||
_mesa_hash_table_insert(var_usage_map, var, usage);
|
||||
|
||||
return usage;
|
||||
}
|
||||
|
||||
static struct vec_var_usage *
|
||||
get_vec_deref_usage(nir_deref_instr *deref,
|
||||
struct hash_table *var_usage_map,
|
||||
nir_variable_mode modes,
|
||||
bool add_usage_entry, void *mem_ctx)
|
||||
{
|
||||
if (!(deref->mode & modes))
|
||||
return NULL;
|
||||
|
||||
return get_vec_var_usage(nir_deref_instr_get_variable(deref),
|
||||
var_usage_map, add_usage_entry, mem_ctx);
|
||||
}
|
||||
|
||||
static void
|
||||
mark_deref_used(nir_deref_instr *deref,
|
||||
nir_component_mask_t comps_read,
|
||||
nir_component_mask_t comps_written,
|
||||
nir_deref_instr *copy_deref,
|
||||
struct hash_table *var_usage_map,
|
||||
nir_variable_mode modes,
|
||||
void *mem_ctx)
|
||||
{
|
||||
if (!(deref->mode & modes))
|
||||
return;
|
||||
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
||||
struct vec_var_usage *usage =
|
||||
get_vec_var_usage(var, var_usage_map, true, mem_ctx);
|
||||
if (!usage)
|
||||
return;
|
||||
|
||||
usage->comps_read |= comps_read & usage->all_comps;
|
||||
usage->comps_written |= comps_written & usage->all_comps;
|
||||
|
||||
struct vec_var_usage *copy_usage = NULL;
|
||||
if (copy_deref) {
|
||||
copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
|
||||
true, mem_ctx);
|
||||
if (copy_usage) {
|
||||
if (usage->vars_copied == NULL) {
|
||||
usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
}
|
||||
_mesa_set_add(usage->vars_copied, copy_usage);
|
||||
} else {
|
||||
usage->has_external_copy = true;
|
||||
}
|
||||
}
|
||||
|
||||
nir_deref_path path;
|
||||
nir_deref_path_init(&path, deref, mem_ctx);
|
||||
|
||||
nir_deref_path copy_path;
|
||||
if (copy_usage)
|
||||
nir_deref_path_init(©_path, copy_deref, mem_ctx);
|
||||
|
||||
unsigned copy_i = 0;
|
||||
for (unsigned i = 0; i < usage->num_levels; i++) {
|
||||
struct array_level_usage *level = &usage->levels[i];
|
||||
nir_deref_instr *deref = path.path[i + 1];
|
||||
assert(deref->deref_type == nir_deref_type_array ||
|
||||
deref->deref_type == nir_deref_type_array_wildcard);
|
||||
|
||||
unsigned max_used;
|
||||
if (deref->deref_type == nir_deref_type_array) {
|
||||
nir_const_value *const_index =
|
||||
nir_src_as_const_value(deref->arr.index);
|
||||
max_used = const_index ? const_index->u32[0] : UINT_MAX;
|
||||
} else {
|
||||
/* For wildcards, we read or wrote the whole thing. */
|
||||
assert(deref->deref_type == nir_deref_type_array_wildcard);
|
||||
max_used = level->array_len - 1;
|
||||
|
||||
if (copy_usage) {
|
||||
/* Match each wildcard level with the level on copy_usage */
|
||||
for (; copy_path.path[copy_i + 1]; copy_i++) {
|
||||
if (copy_path.path[copy_i + 1]->deref_type ==
|
||||
nir_deref_type_array_wildcard)
|
||||
break;
|
||||
}
|
||||
struct array_level_usage *copy_level =
|
||||
©_usage->levels[copy_i++];
|
||||
|
||||
if (level->levels_copied == NULL) {
|
||||
level->levels_copied =
|
||||
_mesa_set_create(mem_ctx, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
}
|
||||
_mesa_set_add(level->levels_copied, copy_level);
|
||||
} else {
|
||||
/* We have a wildcard and it comes from a variable we aren't
|
||||
* tracking; flag it and we'll know to not shorten this array.
|
||||
*/
|
||||
level->has_external_copy = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (comps_written)
|
||||
level->max_written = MAX2(level->max_written, max_used);
|
||||
if (comps_read)
|
||||
level->max_read = MAX2(level->max_read, max_used);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
src_is_load_deref(nir_src src, nir_src deref_src)
|
||||
{
|
||||
assert(src.is_ssa);
|
||||
assert(deref_src.is_ssa);
|
||||
|
||||
if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr);
|
||||
if (load->intrinsic != nir_intrinsic_load_deref)
|
||||
return false;
|
||||
|
||||
assert(load->src[0].is_ssa);
|
||||
|
||||
return load->src[0].ssa == deref_src.ssa;
|
||||
}
|
||||
|
||||
/* Returns all non-self-referential components of a store instruction. A
|
||||
* component is self-referential if it comes from the same component of a load
|
||||
* instruction on the same deref. If the only data in a particular component
|
||||
* of a variable came directly from that component then it's undefined. The
|
||||
* only way to get defined data into a component of a variable is for it to
|
||||
* get written there by something outside or from a different component.
|
||||
*
|
||||
* This is a fairly common pattern in shaders that come from either GLSL IR or
|
||||
* GLSLang because both glsl_to_nir and GLSLang implement write-masking with
|
||||
* load-vec-store.
|
||||
*/
|
||||
static nir_component_mask_t
|
||||
get_non_self_referential_store_comps(nir_intrinsic_instr *store)
|
||||
{
|
||||
nir_component_mask_t comps = nir_intrinsic_write_mask(store);
|
||||
|
||||
assert(store->src[1].is_ssa);
|
||||
nir_instr *src_instr = store->src[1].ssa->parent_instr;
|
||||
if (src_instr->type != nir_instr_type_alu)
|
||||
return comps;
|
||||
|
||||
nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
|
||||
|
||||
if (src_alu->op == nir_op_imov ||
|
||||
src_alu->op == nir_op_fmov) {
|
||||
/* If it's just a swizzle of a load from the same deref, discount any
|
||||
* channels that don't move in the swizzle.
|
||||
*/
|
||||
if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
|
||||
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
|
||||
if (src_alu->src[0].swizzle[i] == i)
|
||||
comps &= ~(1u << i);
|
||||
}
|
||||
}
|
||||
} else if (src_alu->op == nir_op_vec2 ||
|
||||
src_alu->op == nir_op_vec3 ||
|
||||
src_alu->op == nir_op_vec4) {
|
||||
/* If it's a vec, discount any channels that are just loads from the
|
||||
* same deref put in the same spot.
|
||||
*/
|
||||
for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
|
||||
if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
|
||||
src_alu->src[i].swizzle[0] == i)
|
||||
comps &= ~(1u << i);
|
||||
}
|
||||
}
|
||||
|
||||
return comps;
|
||||
}
|
||||
|
||||
static void
|
||||
find_used_components_impl(nir_function_impl *impl,
|
||||
struct hash_table *var_usage_map,
|
||||
nir_variable_mode modes,
|
||||
void *mem_ctx)
|
||||
{
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
mark_deref_used(nir_src_as_deref(intrin->src[0]),
|
||||
nir_ssa_def_components_read(&intrin->dest.ssa), 0,
|
||||
NULL, var_usage_map, modes, mem_ctx);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_deref:
|
||||
mark_deref_used(nir_src_as_deref(intrin->src[0]),
|
||||
0, get_non_self_referential_store_comps(intrin),
|
||||
NULL, var_usage_map, modes, mem_ctx);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_copy_deref: {
|
||||
/* Just mark everything used for copies. */
|
||||
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
||||
nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
|
||||
mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
|
||||
mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
shrink_vec_var_list(struct exec_list *vars,
|
||||
struct hash_table *var_usage_map)
|
||||
{
|
||||
/* Initialize the components kept field of each variable. This is the
|
||||
* AND of the components written and components read. If a component is
|
||||
* written but never read, it's dead. If it is read but never written,
|
||||
* then all values read are undefined garbage and we may as well not read
|
||||
* them.
|
||||
*
|
||||
* The same logic applies to the array length. We make the array length
|
||||
* the minimum needed required length between read and write and plan to
|
||||
* discard any OOB access. The one exception here is indirect writes
|
||||
* because we don't know where they will land and we can't shrink an array
|
||||
* with indirect writes because previously in-bounds writes may become
|
||||
* out-of-bounds and have undefined behavior.
|
||||
*
|
||||
* Also, if we have a copy that to/from something we can't shrink, we need
|
||||
* to leave components and array_len of any wildcards alone.
|
||||
*/
|
||||
nir_foreach_variable(var, vars) {
|
||||
struct vec_var_usage *usage =
|
||||
get_vec_var_usage(var, var_usage_map, false, NULL);
|
||||
if (!usage)
|
||||
continue;
|
||||
|
||||
assert(usage->comps_kept == 0);
|
||||
if (usage->has_external_copy)
|
||||
usage->comps_kept = usage->all_comps;
|
||||
else
|
||||
usage->comps_kept = usage->comps_read & usage->comps_written;
|
||||
|
||||
for (unsigned i = 0; i < usage->num_levels; i++) {
|
||||
struct array_level_usage *level = &usage->levels[i];
|
||||
assert(level->array_len > 0);
|
||||
|
||||
if (level->max_written == UINT_MAX || level->has_external_copy)
|
||||
continue; /* Can't shrink */
|
||||
|
||||
unsigned max_used = MIN2(level->max_read, level->max_written);
|
||||
level->array_len = MIN2(max_used, level->array_len - 1) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* In order for variable copies to work, we have to have the same data type
|
||||
* on the source and the destination. In order to satisfy this, we run a
|
||||
* little fixed-point algorithm to transitively ensure that we get enough
|
||||
* components and array elements for this to hold for all copies.
|
||||
*/
|
||||
bool fp_progress;
|
||||
do {
|
||||
fp_progress = false;
|
||||
nir_foreach_variable(var, vars) {
|
||||
struct vec_var_usage *var_usage =
|
||||
get_vec_var_usage(var, var_usage_map, false, NULL);
|
||||
if (!var_usage || !var_usage->vars_copied)
|
||||
continue;
|
||||
|
||||
struct set_entry *copy_entry;
|
||||
set_foreach(var_usage->vars_copied, copy_entry) {
|
||||
struct vec_var_usage *copy_usage = (void *)copy_entry->key;
|
||||
if (copy_usage->comps_kept != var_usage->comps_kept) {
|
||||
nir_component_mask_t comps_kept =
|
||||
(var_usage->comps_kept | copy_usage->comps_kept);
|
||||
var_usage->comps_kept = comps_kept;
|
||||
copy_usage->comps_kept = comps_kept;
|
||||
fp_progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < var_usage->num_levels; i++) {
|
||||
struct array_level_usage *var_level = &var_usage->levels[i];
|
||||
if (!var_level->levels_copied)
|
||||
continue;
|
||||
|
||||
set_foreach(var_level->levels_copied, copy_entry) {
|
||||
struct array_level_usage *copy_level = (void *)copy_entry->key;
|
||||
if (var_level->array_len != copy_level->array_len) {
|
||||
unsigned array_len =
|
||||
MAX2(var_level->array_len, copy_level->array_len);
|
||||
var_level->array_len = array_len;
|
||||
copy_level->array_len = array_len;
|
||||
fp_progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (fp_progress);
|
||||
|
||||
bool vars_shrunk = false;
|
||||
nir_foreach_variable_safe(var, vars) {
|
||||
struct vec_var_usage *usage =
|
||||
get_vec_var_usage(var, var_usage_map, false, NULL);
|
||||
if (!usage)
|
||||
continue;
|
||||
|
||||
bool shrunk = false;
|
||||
const struct glsl_type *vec_type = var->type;
|
||||
for (unsigned i = 0; i < usage->num_levels; i++) {
|
||||
/* If we've reduced the array to zero elements at some level, just
|
||||
* set comps_kept to 0 and delete the variable.
|
||||
*/
|
||||
if (usage->levels[i].array_len == 0) {
|
||||
usage->comps_kept = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
|
||||
if (usage->levels[i].array_len < glsl_get_length(vec_type))
|
||||
shrunk = true;
|
||||
vec_type = glsl_get_array_element(vec_type);
|
||||
}
|
||||
assert(glsl_type_is_vector_or_scalar(vec_type));
|
||||
|
||||
assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
|
||||
if (usage->comps_kept != usage->all_comps)
|
||||
shrunk = true;
|
||||
|
||||
if (usage->comps_kept == 0) {
|
||||
/* This variable is dead, remove it */
|
||||
vars_shrunk = true;
|
||||
exec_node_remove(&var->node);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!shrunk) {
|
||||
/* This variable doesn't need to be shrunk. Remove it from the
|
||||
* hash table so later steps will ignore it.
|
||||
*/
|
||||
_mesa_hash_table_remove_key(var_usage_map, var);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Build the new var type */
|
||||
unsigned new_num_comps = _mesa_bitcount(usage->comps_kept);
|
||||
const struct glsl_type *new_type =
|
||||
glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
|
||||
for (int i = usage->num_levels - 1; i >= 0; i--) {
|
||||
assert(usage->levels[i].array_len > 0);
|
||||
/* If the original type was a matrix type, we'd like to keep that so
|
||||
* we don't convert matrices into arrays.
|
||||
*/
|
||||
if (i == usage->num_levels - 1 &&
|
||||
glsl_type_is_matrix(glsl_without_array(var->type)) &&
|
||||
new_num_comps > 1 && usage->levels[i].array_len > 1) {
|
||||
new_type = glsl_matrix_type(glsl_get_base_type(new_type),
|
||||
new_num_comps,
|
||||
usage->levels[i].array_len);
|
||||
} else {
|
||||
new_type = glsl_array_type(new_type, usage->levels[i].array_len);
|
||||
}
|
||||
}
|
||||
var->type = new_type;
|
||||
|
||||
vars_shrunk = true;
|
||||
}
|
||||
|
||||
return vars_shrunk;
|
||||
}
|
||||
|
||||
static bool
|
||||
vec_deref_is_oob(nir_deref_instr *deref,
|
||||
struct vec_var_usage *usage)
|
||||
{
|
||||
nir_deref_path path;
|
||||
nir_deref_path_init(&path, deref, NULL);
|
||||
|
||||
bool oob = false;
|
||||
for (unsigned i = 0; i < usage->num_levels; i++) {
|
||||
nir_deref_instr *p = path.path[i + 1];
|
||||
if (p->deref_type == nir_deref_type_array_wildcard)
|
||||
continue;
|
||||
|
||||
nir_const_value *const_index = nir_src_as_const_value(p->arr.index);
|
||||
if (const_index && const_index->u32[0] >= usage->levels[i].array_len) {
|
||||
oob = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nir_deref_path_finish(&path);
|
||||
|
||||
return oob;
|
||||
}
|
||||
|
||||
static bool
|
||||
vec_deref_is_dead_or_oob(nir_deref_instr *deref,
|
||||
struct hash_table *var_usage_map,
|
||||
nir_variable_mode modes)
|
||||
{
|
||||
struct vec_var_usage *usage =
|
||||
get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
|
||||
if (!usage)
|
||||
return false;
|
||||
|
||||
return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
|
||||
}
|
||||
|
||||
static void
|
||||
shrink_vec_var_access_impl(nir_function_impl *impl,
|
||||
struct hash_table *var_usage_map,
|
||||
nir_variable_mode modes)
|
||||
{
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_deref: {
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
if (!(deref->mode & modes))
|
||||
break;
|
||||
|
||||
/* Clean up any dead derefs we find lying around. They may refer
|
||||
* to variables we've deleted.
|
||||
*/
|
||||
if (nir_deref_instr_remove_if_unused(deref))
|
||||
break;
|
||||
|
||||
/* Update the type in the deref to keep the types consistent as
|
||||
* you walk down the chain. We don't need to check if this is one
|
||||
* of the derefs we're shrinking because this is a no-op if it
|
||||
* isn't. The worst that could happen is that we accidentally fix
|
||||
* an invalid deref.
|
||||
*/
|
||||
if (deref->deref_type == nir_deref_type_var) {
|
||||
deref->type = deref->var->type;
|
||||
} else if (deref->deref_type == nir_deref_type_array ||
|
||||
deref->deref_type == nir_deref_type_array_wildcard) {
|
||||
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
||||
assert(glsl_type_is_array(parent->type) ||
|
||||
glsl_type_is_matrix(parent->type));
|
||||
deref->type = glsl_get_array_element(parent->type);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
/* If we have a copy whose source or destination has been deleted
|
||||
* because we determined the variable was dead, then we just
|
||||
* delete the copy instruction. If the source variable was dead
|
||||
* then it was writing undefined garbage anyway and if it's the
|
||||
* destination variable that's dead then the write isn't needed.
|
||||
*/
|
||||
if (intrin->intrinsic == nir_intrinsic_copy_deref) {
|
||||
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
||||
nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
|
||||
if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
|
||||
vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
|
||||
nir_instr_remove(&intrin->instr);
|
||||
nir_deref_instr_remove_if_unused(dst);
|
||||
nir_deref_instr_remove_if_unused(src);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref &&
|
||||
intrin->intrinsic != nir_intrinsic_store_deref)
|
||||
continue;
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
if (!(deref->mode & modes))
|
||||
continue;
|
||||
|
||||
struct vec_var_usage *usage =
|
||||
get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
|
||||
if (!usage)
|
||||
continue;
|
||||
|
||||
if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
||||
nir_ssa_def *u =
|
||||
nir_ssa_undef(&b, intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(u));
|
||||
}
|
||||
nir_instr_remove(&intrin->instr);
|
||||
nir_deref_instr_remove_if_unused(deref);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
||||
b.cursor = nir_after_instr(&intrin->instr);
|
||||
|
||||
nir_ssa_def *undef =
|
||||
nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
|
||||
nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
|
||||
unsigned c = 0;
|
||||
for (unsigned i = 0; i < intrin->num_components; i++) {
|
||||
if (usage->comps_kept & (1u << i))
|
||||
vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
|
||||
else
|
||||
vec_srcs[i] = undef;
|
||||
}
|
||||
nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(vec),
|
||||
vec->parent_instr);
|
||||
|
||||
/* The SSA def is now only used by the swizzle. It's safe to
|
||||
* shrink the number of components.
|
||||
*/
|
||||
assert(list_length(&intrin->dest.ssa.uses) == c);
|
||||
intrin->num_components = c;
|
||||
intrin->dest.ssa.num_components = c;
|
||||
} else {
|
||||
nir_component_mask_t write_mask =
|
||||
nir_intrinsic_write_mask(intrin);
|
||||
|
||||
unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_component_mask_t new_write_mask = 0;
|
||||
unsigned c = 0;
|
||||
for (unsigned i = 0; i < intrin->num_components; i++) {
|
||||
if (usage->comps_kept & (1u << i)) {
|
||||
swizzle[c] = i;
|
||||
if (write_mask & (1u << i))
|
||||
new_write_mask |= 1u << c;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
b.cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_ssa_def *swizzled =
|
||||
nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
|
||||
|
||||
/* Rewrite to use the compacted source */
|
||||
nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
|
||||
nir_src_for_ssa(swizzled));
|
||||
nir_intrinsic_set_write_mask(intrin, new_write_mask);
|
||||
intrin->num_components = c;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
function_impl_has_vars_with_modes(nir_function_impl *impl,
|
||||
nir_variable_mode modes)
|
||||
{
|
||||
nir_shader *shader = impl->function->shader;
|
||||
|
||||
if ((modes & nir_var_global) && !exec_list_is_empty(&shader->globals))
|
||||
return true;
|
||||
|
||||
if ((modes & nir_var_local) && !exec_list_is_empty(&impl->locals))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Attempt to shrink arrays of vectors
|
||||
*
|
||||
* This pass looks at variables which contain a vector or an array (possibly
|
||||
* multiple dimensions) of vectors and attempts to lower to a smaller vector
|
||||
* or array. If the pass can prove that a component of a vector (or array of
|
||||
* vectors) is never really used, then that component will be removed.
|
||||
* Similarly, the pass attempts to shorten arrays based on what elements it
|
||||
* can prove are never read or never contain valid data.
|
||||
*/
|
||||
bool
|
||||
nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
|
||||
{
|
||||
assert((modes & (nir_var_global | nir_var_local)) == modes);
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
struct hash_table *var_usage_map =
|
||||
_mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
bool has_vars_to_shrink = false;
|
||||
nir_foreach_function(function, shader) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
/* Don't even bother crawling the IR if we don't have any variables.
|
||||
* Given that this pass deletes any unused variables, it's likely that
|
||||
* we will be in this scenario eventually.
|
||||
*/
|
||||
if (function_impl_has_vars_with_modes(function->impl, modes)) {
|
||||
has_vars_to_shrink = true;
|
||||
find_used_components_impl(function->impl, var_usage_map,
|
||||
modes, mem_ctx);
|
||||
}
|
||||
}
|
||||
if (!has_vars_to_shrink) {
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool globals_shrunk = false;
|
||||
if (modes & nir_var_global)
|
||||
globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
|
||||
|
||||
bool progress = false;
|
||||
nir_foreach_function(function, shader) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
bool locals_shrunk = false;
|
||||
if (modes & nir_var_local) {
|
||||
locals_shrunk = shrink_vec_var_list(&function->impl->locals,
|
||||
var_usage_map);
|
||||
}
|
||||
|
||||
if (globals_shrunk || locals_shrunk) {
|
||||
shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
|
||||
|
||||
nir_metadata_preserve(function->impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
Reference in New Issue
Block a user