Files
third_party_mesa3d/src/compiler/nir/nir_deref.c
Alyssa Rosenzweig 7f6491b76d nir: Combine if_uses with instruction uses
Every nir_ssa_def is part of a chain of uses, implemented with doubly linked
lists.  That means each requires 2 * 64-bit = 16 bytes per def, which is
memory intensive. Together they require 32 bytes per def. Not cool.

To cut that memory use in half, we can combine the two linked lists into a
single use list that contains both regular instruction uses and if-uses. To do
this, we augment the nir_src with a boolean "is_if", and reimplement the
abstract if-uses operations on top of that list. That boolean should fit into
the padding already in nir_src so should not actually affect memory use, and in
the future we sneak it into the bottom bit of a pointer.

However, this creates a new inefficiency: now iterating over regular uses
separate from if-uses is (nominally) more expensive. It turns out virtually
every caller of nir_foreach_if_use(_safe) also calls nir_foreach_use(_safe)
immediately before, so we rewrite most of the callers to instead call a new
single `nir_foreach_use_including_if(_safe)` which predicates the logic based on
`src->is_if`. This should mitigate the performance difference.

There's a bit of churn, but this is largely a mechanical set of changes.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22343>
2023-04-07 23:48:03 +00:00

1524 lines
48 KiB
C

/*
* Copyright © 2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "nir.h"
#include "nir_builder.h"
#include "nir_deref.h"
#include "util/hash_table.h"
static bool
is_trivial_deref_cast(nir_deref_instr *cast)
{
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
if (!parent)
return false;
return cast->modes == parent->modes &&
cast->type == parent->type &&
cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
}
void
nir_deref_path_init(nir_deref_path *path,
nir_deref_instr *deref, void *mem_ctx)
{
assert(deref != NULL);
/* The length of the short path is at most ARRAY_SIZE - 1 because we need
* room for the NULL terminator.
*/
static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
int count = 0;
nir_deref_instr **tail = &path->_short_path[max_short_path_len];
nir_deref_instr **head = tail;
*tail = NULL;
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
continue;
count++;
if (count <= max_short_path_len)
*(--head) = d;
}
if (count <= max_short_path_len) {
/* If we're under max_short_path_len, just use the short path. */
path->path = head;
goto done;
}
#ifndef NDEBUG
/* Just in case someone uses short_path by accident */
for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
#endif
path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
head = tail = path->path + count;
*tail = NULL;
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
continue;
*(--head) = d;
}
done:
assert(head == path->path);
assert(tail == head + count);
assert(*tail == NULL);
}
void
nir_deref_path_finish(nir_deref_path *path)
{
if (path->path < &path->_short_path[0] ||
path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
ralloc_free(path->path);
}
/**
* Recursively removes unused deref instructions
*/
bool
nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
{
bool progress = false;
for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
/* If anyone is using this deref, leave it alone */
assert(d->dest.is_ssa);
if (!nir_ssa_def_is_unused(&d->dest.ssa))
break;
nir_instr_remove(&d->instr);
progress = true;
}
return progress;
}
bool
nir_deref_instr_has_indirect(nir_deref_instr *instr)
{
while (instr->deref_type != nir_deref_type_var) {
/* Consider casts to be indirects */
if (instr->deref_type == nir_deref_type_cast)
return true;
if ((instr->deref_type == nir_deref_type_array ||
instr->deref_type == nir_deref_type_ptr_as_array) &&
!nir_src_is_const(instr->arr.index))
return true;
instr = nir_deref_instr_parent(instr);
}
return false;
}
bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
{
for (; instr; instr = nir_deref_instr_parent(instr)) {
if (instr->deref_type == nir_deref_type_array &&
nir_src_is_const(instr->arr.index) &&
nir_src_as_uint(instr->arr.index) >=
glsl_get_length(nir_deref_instr_parent(instr)->type))
return true;
}
return false;
}
bool
nir_deref_instr_has_complex_use(nir_deref_instr *deref,
nir_deref_instr_has_complex_use_options opts)
{
nir_foreach_use_including_if(use_src, &deref->dest.ssa) {
if (use_src->is_if)
return true;
nir_instr *use_instr = use_src->parent_instr;
switch (use_instr->type) {
case nir_instr_type_deref: {
nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
/* A var deref has no sources */
assert(use_deref->deref_type != nir_deref_type_var);
/* If a deref shows up in an array index or something like that, it's
* a complex use.
*/
if (use_src != &use_deref->parent)
return true;
/* Anything that isn't a basic struct or array deref is considered to
* be a "complex" use. In particular, we don't allow ptr_as_array
* because we assume that opt_deref will turn any non-complex
* ptr_as_array derefs into regular array derefs eventually so passes
* which only want to handle simple derefs will pick them up in a
* later pass.
*/
if (use_deref->deref_type != nir_deref_type_struct &&
use_deref->deref_type != nir_deref_type_array_wildcard &&
use_deref->deref_type != nir_deref_type_array)
return true;
if (nir_deref_instr_has_complex_use(use_deref, opts))
return true;
continue;
}
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
switch (use_intrin->intrinsic) {
case nir_intrinsic_load_deref:
assert(use_src == &use_intrin->src[0]);
continue;
case nir_intrinsic_copy_deref:
assert(use_src == &use_intrin->src[0] ||
use_src == &use_intrin->src[1]);
continue;
case nir_intrinsic_store_deref:
/* A use in src[1] of a store means we're taking that pointer and
* writing it to a variable. Because we have no idea who will
* read that variable and what they will do with the pointer, it's
* considered a "complex" use. A use in src[0], on the other
* hand, is a simple use because we're just going to dereference
* it and write a value there.
*/
if (use_src == &use_intrin->src[0])
continue;
return true;
case nir_intrinsic_memcpy_deref:
if (use_src == &use_intrin->src[0] &&
(opts & nir_deref_instr_has_complex_use_allow_memcpy_dst))
continue;
if (use_src == &use_intrin->src[1] &&
(opts & nir_deref_instr_has_complex_use_allow_memcpy_src))
continue;
return true;
default:
return true;
}
unreachable("Switch default failed");
}
default:
return true;
}
}
return false;
}
static unsigned
type_scalar_size_bytes(const struct glsl_type *type)
{
assert(glsl_type_is_vector_or_scalar(type) ||
glsl_type_is_matrix(type));
return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
}
unsigned
nir_deref_instr_array_stride(nir_deref_instr *deref)
{
switch (deref->deref_type) {
case nir_deref_type_array:
case nir_deref_type_array_wildcard: {
const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
unsigned stride = glsl_get_explicit_stride(arr_type);
if ((glsl_type_is_matrix(arr_type) &&
glsl_matrix_type_is_row_major(arr_type)) ||
(glsl_type_is_vector(arr_type) && stride == 0))
stride = type_scalar_size_bytes(arr_type);
return stride;
}
case nir_deref_type_ptr_as_array:
return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
case nir_deref_type_cast:
return deref->cast.ptr_stride;
default:
return 0;
}
}
static unsigned
type_get_array_stride(const struct glsl_type *elem_type,
glsl_type_size_align_func size_align)
{
unsigned elem_size, elem_align;
size_align(elem_type, &elem_size, &elem_align);
return ALIGN_POT(elem_size, elem_align);
}
static unsigned
struct_type_get_field_offset(const struct glsl_type *struct_type,
glsl_type_size_align_func size_align,
unsigned field_idx)
{
assert(glsl_type_is_struct_or_ifc(struct_type));
unsigned offset = 0;
for (unsigned i = 0; i <= field_idx; i++) {
unsigned elem_size, elem_align;
size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
offset = ALIGN_POT(offset, elem_align);
if (i < field_idx)
offset += elem_size;
}
return offset;
}
unsigned
nir_deref_instr_get_const_offset(nir_deref_instr *deref,
glsl_type_size_align_func size_align)
{
nir_deref_path path;
nir_deref_path_init(&path, deref, NULL);
unsigned offset = 0;
for (nir_deref_instr **p = &path.path[1]; *p; p++) {
switch ((*p)->deref_type) {
case nir_deref_type_array:
offset += nir_src_as_uint((*p)->arr.index) *
type_get_array_stride((*p)->type, size_align);
break;
case nir_deref_type_struct: {
/* p starts at path[1], so this is safe */
nir_deref_instr *parent = *(p - 1);
offset += struct_type_get_field_offset(parent->type, size_align,
(*p)->strct.index);
break;
}
case nir_deref_type_cast:
/* A cast doesn't contribute to the offset */
break;
default:
unreachable("Unsupported deref type");
}
}
nir_deref_path_finish(&path);
return offset;
}
nir_ssa_def *
nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
glsl_type_size_align_func size_align)
{
nir_deref_path path;
nir_deref_path_init(&path, deref, NULL);
nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
for (nir_deref_instr **p = &path.path[1]; *p; p++) {
switch ((*p)->deref_type) {
case nir_deref_type_array:
case nir_deref_type_ptr_as_array: {
nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
int stride = type_get_array_stride((*p)->type, size_align);
offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
break;
}
case nir_deref_type_struct: {
/* p starts at path[1], so this is safe */
nir_deref_instr *parent = *(p - 1);
unsigned field_offset =
struct_type_get_field_offset(parent->type, size_align,
(*p)->strct.index);
offset = nir_iadd_imm(b, offset, field_offset);
break;
}
case nir_deref_type_cast:
/* A cast doesn't contribute to the offset */
break;
default:
unreachable("Unsupported deref type");
}
}
nir_deref_path_finish(&path);
return offset;
}
bool
nir_remove_dead_derefs_impl(nir_function_impl *impl)
{
bool progress = false;
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type == nir_instr_type_deref &&
nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
progress = true;
}
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
return progress;
}
bool
nir_remove_dead_derefs(nir_shader *shader)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl && nir_remove_dead_derefs_impl(function->impl))
progress = true;
}
return progress;
}
void
nir_fixup_deref_modes(nir_shader *shader)
{
nir_foreach_function(function, shader) {
if (!function->impl)
continue;
nir_foreach_block(block, function->impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_deref)
continue;
nir_deref_instr *deref = nir_instr_as_deref(instr);
if (deref->deref_type == nir_deref_type_cast)
continue;
nir_variable_mode parent_modes;
if (deref->deref_type == nir_deref_type_var) {
parent_modes = deref->var->data.mode;
} else {
assert(deref->parent.is_ssa);
nir_deref_instr *parent =
nir_instr_as_deref(deref->parent.ssa->parent_instr);
parent_modes = parent->modes;
}
deref->modes = parent_modes;
}
}
}
}
static bool
modes_may_alias(nir_variable_mode a, nir_variable_mode b)
{
/* Generic pointers can alias with SSBOs */
if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
(b & (nir_var_mem_ssbo | nir_var_mem_global)))
return true;
/* Pointers can only alias if they share a mode. */
return a & b;
}
ALWAYS_INLINE static nir_deref_compare_result
compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path,
unsigned *i, bool (*stop_fn)(const nir_deref_instr *))
{
/* Start off assuming they fully compare. We ignore equality for now. In
* the end, we'll determine that by containment.
*/
nir_deref_compare_result result = nir_derefs_may_alias_bit |
nir_derefs_a_contains_b_bit |
nir_derefs_b_contains_a_bit;
nir_deref_instr **a = a_path->path;
nir_deref_instr **b = b_path->path;
for (; a[*i] != NULL; (*i)++) {
if (a[*i] != b[*i])
break;
if (stop_fn && stop_fn(a[*i]))
break;
}
/* We're at either the tail or the divergence point between the two deref
* paths. Look to see if either contains cast or a ptr_as_array deref. If
* it does we don't know how to safely make any inferences. Hopefully,
* nir_opt_deref will clean most of these up and we can start inferring
* things again.
*
* In theory, we could do a bit better. For instance, we could detect the
* case where we have exactly one ptr_as_array deref in the chain after the
* divergence point and it's matched in both chains and the two chains have
* different constant indices.
*/
for (unsigned j = *i; a[j] != NULL; j++) {
if (stop_fn && stop_fn(a[j]))
break;
if (a[j]->deref_type == nir_deref_type_cast ||
a[j]->deref_type == nir_deref_type_ptr_as_array)
return nir_derefs_may_alias_bit;
}
for (unsigned j = *i; b[j] != NULL; j++) {
if (stop_fn && stop_fn(b[j]))
break;
if (b[j]->deref_type == nir_deref_type_cast ||
b[j]->deref_type == nir_deref_type_ptr_as_array)
return nir_derefs_may_alias_bit;
}
for (; a[*i] != NULL && b[*i] != NULL; (*i)++) {
if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i])))
break;
switch (a[*i]->deref_type) {
case nir_deref_type_array:
case nir_deref_type_array_wildcard: {
assert(b[*i]->deref_type == nir_deref_type_array ||
b[*i]->deref_type == nir_deref_type_array_wildcard);
if (a[*i]->deref_type == nir_deref_type_array_wildcard) {
if (b[*i]->deref_type != nir_deref_type_array_wildcard)
result &= ~nir_derefs_b_contains_a_bit;
} else if (b[*i]->deref_type == nir_deref_type_array_wildcard) {
if (a[*i]->deref_type != nir_deref_type_array_wildcard)
result &= ~nir_derefs_a_contains_b_bit;
} else {
assert(a[*i]->deref_type == nir_deref_type_array &&
b[*i]->deref_type == nir_deref_type_array);
assert(a[*i]->arr.index.is_ssa && b[*i]->arr.index.is_ssa);
if (nir_src_is_const(a[*i]->arr.index) &&
nir_src_is_const(b[*i]->arr.index)) {
/* If they're both direct and have different offsets, they
* don't even alias much less anything else.
*/
if (nir_src_as_uint(a[*i]->arr.index) !=
nir_src_as_uint(b[*i]->arr.index))
return nir_derefs_do_not_alias;
} else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) {
/* They're the same indirect, continue on */
} else {
/* They're not the same index so we can't prove anything about
* containment.
*/
result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
}
}
break;
}
case nir_deref_type_struct: {
/* If they're different struct members, they don't even alias */
if (a[*i]->strct.index != b[*i]->strct.index)
return nir_derefs_do_not_alias;
break;
}
default:
unreachable("Invalid deref type");
}
}
/* If a is longer than b, then it can't contain b. If neither a[i] nor
* b[i] are NULL then we aren't at the end of the chain and we know nothing
* about containment.
*/
if (a[*i] != NULL)
result &= ~nir_derefs_a_contains_b_bit;
if (b[*i] != NULL)
result &= ~nir_derefs_b_contains_a_bit;
/* If a contains b and b contains a they must be equal. */
if ((result & nir_derefs_a_contains_b_bit) &&
(result & nir_derefs_b_contains_a_bit))
result |= nir_derefs_equal_bit;
return result;
}
static bool
is_interface_struct_deref(const nir_deref_instr *deref)
{
if (deref->deref_type == nir_deref_type_struct) {
assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type));
return true;
} else {
return false;
}
}
nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path *a_path,
nir_deref_path *b_path)
{
if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
return nir_derefs_do_not_alias;
if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
return nir_derefs_may_alias_bit;
unsigned path_idx = 1;
if (a_path->path[0]->deref_type == nir_deref_type_var) {
const nir_variable *a_var = a_path->path[0]->var;
const nir_variable *b_var = b_path->path[0]->var;
/* If we got here, the two variables must have the same mode. The
* only way modes_may_alias() can return true for two different modes
* is if one is global and the other ssbo. However, Global variables
* only exist in OpenCL and SSBOs don't exist there. No API allows
* both for variables.
*/
assert(a_var->data.mode == b_var->data.mode);
switch (a_var->data.mode) {
case nir_var_mem_ssbo: {
nir_deref_compare_result binding_compare;
if (a_var == b_var) {
binding_compare = compare_deref_paths(a_path, b_path, &path_idx,
is_interface_struct_deref);
} else {
binding_compare = nir_derefs_do_not_alias;
}
if (binding_compare & nir_derefs_equal_bit)
break;
/* If the binding derefs can't alias and at least one is RESTRICT,
* then we know they can't alias.
*/
if (!(binding_compare & nir_derefs_may_alias_bit) &&
((a_var->data.access & ACCESS_RESTRICT) ||
(b_var->data.access & ACCESS_RESTRICT)))
return nir_derefs_do_not_alias;
return nir_derefs_may_alias_bit;
}
case nir_var_mem_shared:
if (a_var == b_var)
break;
/* Per SPV_KHR_workgroup_memory_explicit_layout and
* GL_EXT_shared_memory_block, shared blocks alias each other.
* We will have either all blocks or all non-blocks.
*/
if (glsl_type_is_interface(a_var->type) ||
glsl_type_is_interface(b_var->type)) {
assert(glsl_type_is_interface(a_var->type) &&
glsl_type_is_interface(b_var->type));
return nir_derefs_may_alias_bit;
}
/* Otherwise, distinct shared vars don't alias */
return nir_derefs_do_not_alias;
default:
/* For any other variable types, if we can chase them back to the
* variable, and the variables are different, they don't alias.
*/
if (a_var == b_var)
break;
return nir_derefs_do_not_alias;
}
} else {
assert(a_path->path[0]->deref_type == nir_deref_type_cast);
/* If they're not exactly the same cast, it's hard to compare them so we
* just assume they alias. Comparing casts is tricky as there are lots
* of things such as mode, type, etc. to make sure work out; for now, we
* just assume nit_opt_deref will combine them and compare the deref
* instructions.
*
* TODO: At some point in the future, we could be clever and understand
* that a float[] and int[] have the same layout and aliasing structure
* but double[] and vec3[] do not and we could potentially be a bit
* smarter here.
*/
if (a_path->path[0] != b_path->path[0])
return nir_derefs_may_alias_bit;
}
return compare_deref_paths(a_path, b_path, &path_idx, NULL);
}
nir_deref_compare_result
nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
{
if (a == b) {
return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
}
nir_deref_path a_path, b_path;
nir_deref_path_init(&a_path, a, NULL);
nir_deref_path_init(&b_path, b, NULL);
assert(a_path.path[0]->deref_type == nir_deref_type_var ||
a_path.path[0]->deref_type == nir_deref_type_cast);
assert(b_path.path[0]->deref_type == nir_deref_type_var ||
b_path.path[0]->deref_type == nir_deref_type_cast);
nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
nir_deref_path_finish(&a_path);
nir_deref_path_finish(&b_path);
return result;
}
nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
{
if (!deref->_path) {
deref->_path = ralloc(mem_ctx, nir_deref_path);
nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
}
return deref->_path;
}
nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
nir_deref_and_path *a,
nir_deref_and_path *b)
{
if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
return nir_compare_derefs(a->instr, b->instr);
return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
nir_get_deref_path(mem_ctx, b));
}
struct rematerialize_deref_state {
bool progress;
nir_builder builder;
nir_block *block;
struct hash_table *cache;
};
static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr *deref,
struct rematerialize_deref_state *state)
{
if (deref->instr.block == state->block)
return deref;
if (!state->cache) {
state->cache = _mesa_pointer_hash_table_create(NULL);
}
struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
if (cached)
return cached->data;
nir_builder *b = &state->builder;
nir_deref_instr *new_deref =
nir_deref_instr_create(b->shader, deref->deref_type);
new_deref->modes = deref->modes;
new_deref->type = deref->type;
if (deref->deref_type == nir_deref_type_var) {
new_deref->var = deref->var;
} else {
nir_deref_instr *parent = nir_src_as_deref(deref->parent);
if (parent) {
parent = rematerialize_deref_in_block(parent, state);
new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
} else {
nir_src_copy(&new_deref->parent, &deref->parent, &new_deref->instr);
}
}
switch (deref->deref_type) {
case nir_deref_type_var:
case nir_deref_type_array_wildcard:
/* Nothing more to do */
break;
case nir_deref_type_cast:
new_deref->cast.ptr_stride = deref->cast.ptr_stride;
new_deref->cast.align_mul = deref->cast.align_mul;
new_deref->cast.align_offset = deref->cast.align_offset;
break;
case nir_deref_type_array:
case nir_deref_type_ptr_as_array:
assert(!nir_src_as_deref(deref->arr.index));
nir_src_copy(&new_deref->arr.index, &deref->arr.index, &new_deref->instr);
break;
case nir_deref_type_struct:
new_deref->strct.index = deref->strct.index;
break;
default:
unreachable("Invalid deref instruction type");
}
nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
deref->dest.ssa.num_components,
deref->dest.ssa.bit_size,
NULL);
nir_builder_instr_insert(b, &new_deref->instr);
return new_deref;
}
static bool
rematerialize_deref_src(nir_src *src, void *_state)
{
struct rematerialize_deref_state *state = _state;
nir_deref_instr *deref = nir_src_as_deref(*src);
if (!deref)
return true;
nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
if (block_deref != deref) {
nir_instr_rewrite_src(src->parent_instr, src,
nir_src_for_ssa(&block_deref->dest.ssa));
nir_deref_instr_remove_if_unused(deref);
state->progress = true;
}
return true;
}
/** Re-materialize derefs in every block
*
* This pass re-materializes deref instructions in every block in which it is
* used. After this pass has been run, every use of a deref will be of a
* deref in the same block as the use. Also, all unused derefs will be
* deleted as a side-effect.
*
* Derefs used as sources of phi instructions are not rematerialized.
*/
bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
{
struct rematerialize_deref_state state = { 0 };
nir_builder_init(&state.builder, impl);
nir_foreach_block_unstructured(block, impl) {
state.block = block;
/* Start each block with a fresh cache */
if (state.cache)
_mesa_hash_table_clear(state.cache, NULL);
nir_foreach_instr_safe(instr, block) {
if (instr->type == nir_instr_type_deref &&
nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
continue;
/* If a deref is used in a phi, we can't rematerialize it, as the new
* derefs would appear before the phi, which is not valid.
*/
if (instr->type == nir_instr_type_phi)
continue;
state.builder.cursor = nir_before_instr(instr);
nir_foreach_src(instr, rematerialize_deref_src, &state);
}
#ifndef NDEBUG
nir_if *following_if = nir_block_get_following_if(block);
if (following_if)
assert(!nir_src_as_deref(following_if->condition));
#endif
}
_mesa_hash_table_destroy(state.cache, NULL);
return state.progress;
}
static void
nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
{
nir_foreach_use(use, &parent->dest.ssa) {
if (use->parent_instr->type != nir_instr_type_deref)
continue;
nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
switch (child->deref_type) {
case nir_deref_type_var:
unreachable("nir_deref_type_var cannot be a child");
case nir_deref_type_array:
case nir_deref_type_array_wildcard:
child->type = glsl_get_array_element(parent->type);
break;
case nir_deref_type_ptr_as_array:
child->type = parent->type;
break;
case nir_deref_type_struct:
child->type = glsl_get_struct_field(parent->type,
child->strct.index);
break;
case nir_deref_type_cast:
/* We stop the recursion here */
continue;
}
/* Recurse into children */
nir_deref_instr_fixup_child_types(child);
}
}
static bool
opt_alu_of_cast(nir_alu_instr *alu)
{
bool progress = false;
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
assert(alu->src[i].src.is_ssa);
nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
if (src_instr->type != nir_instr_type_deref)
continue;
nir_deref_instr *src_deref = nir_instr_as_deref(src_instr);
if (src_deref->deref_type != nir_deref_type_cast)
continue;
assert(src_deref->parent.is_ssa);
nir_instr_rewrite_src_ssa(&alu->instr, &alu->src[i].src,
src_deref->parent.ssa);
progress = true;
}
return progress;
}
static bool
is_trivial_array_deref_cast(nir_deref_instr *cast)
{
assert(is_trivial_deref_cast(cast));
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
if (parent->deref_type == nir_deref_type_array) {
return cast->cast.ptr_stride ==
glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
} else if (parent->deref_type == nir_deref_type_ptr_as_array) {
return cast->cast.ptr_stride ==
nir_deref_instr_array_stride(parent);
} else {
return false;
}
}
static bool
is_deref_ptr_as_array(nir_instr *instr)
{
return instr->type == nir_instr_type_deref &&
nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
}
static bool
opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
{
assert(cast->deref_type == nir_deref_type_cast);
if (cast->cast.align_mul == 0)
return false;
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
if (parent == NULL)
return false;
/* Don't use any default alignment for this check. We don't want to fall
* back to type alignment too early in case we find out later that we're
* somehow a child of a packed struct.
*/
uint32_t parent_mul, parent_offset;
if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
&parent_mul, &parent_offset))
return false;
/* If this cast increases the alignment, we want to keep it.
*
* There is a possibility that the larger alignment provided by this cast
* somehow disagrees with the smaller alignment further up the deref chain.
* In that case, we choose to favor the alignment closer to the actual
* memory operation which, in this case, is the cast and not its parent so
* keeping the cast alignment is the right thing to do.
*/
if (parent_mul < cast->cast.align_mul)
return false;
/* If we've gotten here, we have a parent deref with an align_mul at least
* as large as ours so we can potentially throw away the alignment
* information on this deref. There are two cases to consider here:
*
* 1. We can chase the deref all the way back to the variable. In this
* case, we have "perfect" knowledge, modulo indirect array derefs.
* Unless we've done something wrong in our indirect/wildcard stride
* calculations, our knowledge from the deref walk is better than the
* client's.
*
* 2. We can't chase it all the way back to the variable. In this case,
* because our call to nir_get_explicit_deref_align(parent, ...) above
* above passes default_to_type_align=false, the only way we can even
* get here is if something further up the deref chain has a cast with
* an alignment which can only happen if we get an alignment from the
* client (most likely a decoration in the SPIR-V). If the client has
* provided us with two conflicting alignments in the deref chain,
* that's their fault and we can do whatever we want.
*
* In either case, we should be without our rights, at this point, to throw
* away the alignment information on this deref. However, to be "nice" to
* weird clients, we do one more check. It really shouldn't happen but
* it's possible that the parent's alignment offset disagrees with the
* cast's alignment offset. In this case, we consider the cast as
* providing more information (or at least more valid information) and keep
* it even if the align_mul from the parent is larger.
*/
assert(cast->cast.align_mul <= parent_mul);
if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
return false;
/* If we got here, the parent has better alignment information than the
* child and we can get rid of the child alignment information.
*/
cast->cast.align_mul = 0;
cast->cast.align_offset = 0;
return true;
}
/**
* Remove casts that just wrap other casts.
*/
static bool
opt_remove_cast_cast(nir_deref_instr *cast)
{
nir_deref_instr *parent = nir_deref_instr_parent(cast);
if (parent == NULL || parent->deref_type != nir_deref_type_cast)
return false;
/* Copy align info from the parent cast if needed
*
* In the case that align_mul = 0, the alignment for this cast is inhereted
* from the parent deref (if any). If we aren't careful, removing our
* parent cast from the chain may lose alignment information so we need to
* copy the parent's alignment information (if any).
*
* opt_remove_restricting_cast_alignments() above is run before this pass
* and will will have cleared our alignment (set align_mul = 0) in the case
* where the parent's alignment information is somehow superior.
*/
if (cast->cast.align_mul == 0) {
cast->cast.align_mul = parent->cast.align_mul;
cast->cast.align_offset = parent->cast.align_offset;
}
nir_instr_rewrite_src(&cast->instr, &cast->parent,
nir_src_for_ssa(parent->parent.ssa));
return true;
}
/* Restrict variable modes in casts.
*
* If we know from something higher up the deref chain that the deref has a
* specific mode, we can cast to more general and back but we can never cast
* across modes. For non-cast derefs, we should only ever do anything here if
* the parent eventually comes from a cast that we restricted earlier.
*/
static bool
opt_restrict_deref_modes(nir_deref_instr *deref)
{
if (deref->deref_type == nir_deref_type_var) {
assert(deref->modes == deref->var->data.mode);
return false;
}
nir_deref_instr *parent = nir_src_as_deref(deref->parent);
if (parent == NULL || parent->modes == deref->modes)
return false;
assert(parent->modes & deref->modes);
deref->modes &= parent->modes;
return true;
}
static bool
opt_remove_sampler_cast(nir_deref_instr *cast)
{
assert(cast->deref_type == nir_deref_type_cast);
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
if (parent == NULL)
return false;
/* Strip both types down to their non-array type and bail if there are any
* discrepancies in array lengths.
*/
const struct glsl_type *parent_type = parent->type;
const struct glsl_type *cast_type = cast->type;
while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
return false;
parent_type = glsl_get_array_element(parent_type);
cast_type = glsl_get_array_element(cast_type);
}
if (!glsl_type_is_sampler(parent_type))
return false;
if (cast_type != glsl_bare_sampler_type() &&
(glsl_type_is_bare_sampler(parent_type) ||
cast_type != glsl_sampler_type_to_texture(parent_type)))
return false;
/* We're a cast from a more detailed sampler type to a bare sampler or a
* texture type with the same dimensionality.
*/
nir_ssa_def_rewrite_uses(&cast->dest.ssa,
&parent->dest.ssa);
nir_instr_remove(&cast->instr);
/* Recursively crawl the deref tree and clean up types */
nir_deref_instr_fixup_child_types(parent);
return true;
}
/**
* Is this casting a struct to a contained struct.
* struct a { struct b field0 };
* ssa_5 is structa;
* deref_cast (structb *)ssa_5 (function_temp structb);
* converts to
* deref_struct &ssa_5->field0 (function_temp structb);
* This allows subsequent copy propagation to work.
*/
static bool
opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
{
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
if (!parent)
return false;
if (cast->cast.align_mul > 0)
return false;
if (!glsl_type_is_struct(parent->type))
return false;
/* Empty struct */
if (glsl_get_length(parent->type) < 1)
return false;
if (glsl_get_struct_field_offset(parent->type, 0) != 0)
return false;
const struct glsl_type *field_type = glsl_get_struct_field(parent->type, 0);
if (cast->type != field_type)
return false;
/* we can't drop the stride information */
if (cast->cast.ptr_stride != glsl_get_explicit_stride(field_type))
return false;
nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
nir_deref_instr_remove_if_unused(cast);
return true;
}
static bool
opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
{
bool progress = false;
progress |= opt_remove_restricting_cast_alignments(cast);
if (opt_replace_struct_wrapper_cast(b, cast))
return true;
if (opt_remove_sampler_cast(cast))
return true;
progress |= opt_remove_cast_cast(cast);
if (!is_trivial_deref_cast(cast))
return progress;
/* If this deref still contains useful alignment information, we don't want
* to delete it.
*/
if (cast->cast.align_mul > 0)
return progress;
bool trivial_array_cast = is_trivial_array_deref_cast(cast);
assert(cast->dest.is_ssa);
assert(cast->parent.is_ssa);
nir_foreach_use_including_if_safe(use_src, &cast->dest.ssa) {
assert(!use_src->is_if && "there cannot be if-uses");
/* If this isn't a trivial array cast, we can't propagate into
* ptr_as_array derefs.
*/
if (is_deref_ptr_as_array(use_src->parent_instr) &&
!trivial_array_cast)
continue;
nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
progress = true;
}
if (nir_deref_instr_remove_if_unused(cast))
progress = true;
return progress;
}
static bool
opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
{
assert(deref->deref_type == nir_deref_type_ptr_as_array);
nir_deref_instr *parent = nir_deref_instr_parent(deref);
if (nir_src_is_const(deref->arr.index) &&
nir_src_as_int(deref->arr.index) == 0) {
/* If it's a ptr_as_array deref with an index of 0, it does nothing
* and we can just replace its uses with its parent, unless it has
* alignment information.
*
* The source of a ptr_as_array deref always has a deref_type of
* nir_deref_type_array or nir_deref_type_cast. If it's a cast, it
* may be trivial and we may be able to get rid of that too. Any
* trivial cast of trivial cast cases should be handled already by
* opt_deref_cast() above.
*/
if (parent->deref_type == nir_deref_type_cast &&
parent->cast.align_mul == 0 &&
is_trivial_deref_cast(parent))
parent = nir_deref_instr_parent(parent);
nir_ssa_def_rewrite_uses(&deref->dest.ssa,
&parent->dest.ssa);
nir_instr_remove(&deref->instr);
return true;
}
if (parent->deref_type != nir_deref_type_array &&
parent->deref_type != nir_deref_type_ptr_as_array)
return false;
assert(parent->parent.is_ssa);
assert(parent->arr.index.is_ssa);
assert(deref->arr.index.is_ssa);
deref->arr.in_bounds &= parent->arr.in_bounds;
nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
deref->arr.index.ssa);
deref->deref_type = parent->deref_type;
nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
nir_src_for_ssa(new_idx));
return true;
}
static bool
is_vector_bitcast_deref(nir_deref_instr *cast,
nir_component_mask_t mask,
bool is_write)
{
if (cast->deref_type != nir_deref_type_cast)
return false;
/* Don't throw away useful alignment information */
if (cast->cast.align_mul > 0)
return false;
/* It has to be a cast of another deref */
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
if (parent == NULL)
return false;
/* The parent has to be a vector or scalar */
if (!glsl_type_is_vector_or_scalar(parent->type))
return false;
/* Don't bother with 1-bit types */
unsigned cast_bit_size = glsl_get_bit_size(cast->type);
unsigned parent_bit_size = glsl_get_bit_size(parent->type);
if (cast_bit_size == 1 || parent_bit_size == 1)
return false;
/* A strided vector type means it's not tightly packed */
if (glsl_get_explicit_stride(cast->type) ||
glsl_get_explicit_stride(parent->type))
return false;
assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
(parent_bit_size / 8);
if (bytes_used > parent_bytes)
return false;
if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
parent_bit_size))
return false;
return true;
}
static nir_ssa_def *
resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
{
if (num_components == data->num_components)
return data;
unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
swiz[i] = i;
return nir_swizzle(b, data, swiz, num_components);
}
static bool
opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
{
nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
nir_component_mask_t read_mask =
nir_ssa_def_components_read(&load->dest.ssa);
/* LLVM loves take advantage of the fact that vec3s in OpenCL are
* vec4-aligned and so it can just read/write them as vec4s. This
* results in a LOT of vec4->vec3 casts on loads and stores.
*/
if (is_vector_bitcast_deref(deref, read_mask, false)) {
const unsigned old_num_comps = load->dest.ssa.num_components;
const unsigned old_bit_size = load->dest.ssa.bit_size;
nir_deref_instr *parent = nir_src_as_deref(deref->parent);
const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
const unsigned new_bit_size = glsl_get_bit_size(parent->type);
/* Stomp it to reference the parent */
nir_instr_rewrite_src(&load->instr, &load->src[0],
nir_src_for_ssa(&parent->dest.ssa));
assert(load->dest.is_ssa);
load->dest.ssa.bit_size = new_bit_size;
load->dest.ssa.num_components = new_num_comps;
load->num_components = new_num_comps;
b->cursor = nir_after_instr(&load->instr);
nir_ssa_def *data = &load->dest.ssa;
if (old_bit_size != new_bit_size)
data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
data = resize_vector(b, data, old_num_comps);
nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
data->parent_instr);
return true;
}
return false;
}
static bool
opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
{
nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
/* LLVM loves take advantage of the fact that vec3s in OpenCL are
* vec4-aligned and so it can just read/write them as vec4s. This
* results in a LOT of vec4->vec3 casts on loads and stores.
*/
if (is_vector_bitcast_deref(deref, write_mask, true)) {
assert(store->src[1].is_ssa);
nir_ssa_def *data = store->src[1].ssa;
const unsigned old_bit_size = data->bit_size;
nir_deref_instr *parent = nir_src_as_deref(deref->parent);
const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
const unsigned new_bit_size = glsl_get_bit_size(parent->type);
nir_instr_rewrite_src(&store->instr, &store->src[0],
nir_src_for_ssa(&parent->dest.ssa));
/* Restrict things down as needed so the bitcast doesn't fail */
data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
if (old_bit_size != new_bit_size)
data = nir_bitcast_vector(b, data, new_bit_size);
data = resize_vector(b, data, new_num_comps);
nir_instr_rewrite_src(&store->instr, &store->src[1],
nir_src_for_ssa(data));
store->num_components = new_num_comps;
/* Adjust the write mask */
write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
new_bit_size);
nir_intrinsic_set_write_mask(store, write_mask);
return true;
}
return false;
}
static bool
opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
{
nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
if (deref == NULL)
return false;
nir_ssa_def *deref_is = NULL;
if (nir_deref_mode_must_be(deref, modes))
deref_is = nir_imm_true(b);
if (!nir_deref_mode_may_be(deref, modes))
deref_is = nir_imm_false(b);
if (deref_is == NULL)
return false;
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
nir_instr_remove(&intrin->instr);
return true;
}
bool
nir_opt_deref_impl(nir_function_impl *impl)
{
bool progress = false;
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
b.cursor = nir_before_instr(instr);
switch (instr->type) {
case nir_instr_type_alu: {
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (opt_alu_of_cast(alu))
progress = true;
break;
}
case nir_instr_type_deref: {
nir_deref_instr *deref = nir_instr_as_deref(instr);
if (opt_restrict_deref_modes(deref))
progress = true;
switch (deref->deref_type) {
case nir_deref_type_ptr_as_array:
if (opt_deref_ptr_as_array(&b, deref))
progress = true;
break;
case nir_deref_type_cast:
if (opt_deref_cast(&b, deref))
progress = true;
break;
default:
/* Do nothing */
break;
}
break;
}
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_deref:
if (opt_load_vec_deref(&b, intrin))
progress = true;
break;
case nir_intrinsic_store_deref:
if (opt_store_vec_deref(&b, intrin))
progress = true;
break;
case nir_intrinsic_deref_mode_is:
if (opt_known_deref_mode_is(&b, intrin))
progress = true;
break;
default:
/* Do nothing */
break;
}
break;
}
default:
/* Do nothing */
break;
}
}
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
return progress;
}
bool
nir_opt_deref(nir_shader *shader)
{
bool progress = false;
nir_foreach_function(func, shader) {
if (func->impl && nir_opt_deref_impl(func->impl))
progress = true;
}
return progress;
}