third_party_mesa3d/src/compiler/nir/nir_deref.c

/*
 * Copyright © 2018 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "nir.h"
#include "nir_builder.h"
#include "nir_deref.h"
#include "util/hash_table.h"

static bool
is_trivial_deref_cast(nir_deref_instr *cast)
{
   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
   if (!parent)
      return false;

   return cast->modes == parent->modes &&
          cast->type == parent->type &&
          cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
          cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
}

void
nir_deref_path_init(nir_deref_path *path,
                    nir_deref_instr *deref, void *mem_ctx)
{
   assert(deref != NULL);

   /* The length of the short path is at most ARRAY_SIZE - 1 because we need
    * room for the NULL terminator.
    */
   static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;

   int count = 0;

   nir_deref_instr **tail = &path->_short_path[max_short_path_len];
   nir_deref_instr **head = tail;

   *tail = NULL;
   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
      if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
         continue;
      count++;
      if (count <= max_short_path_len)
         *(--head) = d;
   }

   if (count <= max_short_path_len) {
      /* If we're under max_short_path_len, just use the short path. */
      path->path = head;
      goto done;
   }

#ifndef NDEBUG
   /* Just in case someone uses short_path by accident */
   for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
      path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
#endif

   path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
   head = tail = path->path + count;
   *tail = NULL;
   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
      if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
         continue;
      *(--head) = d;
   }

done:
   assert(head == path->path);
   assert(tail == head + count);
   assert(*tail == NULL);
}

void
nir_deref_path_finish(nir_deref_path *path)
{
   if (path->path < &path->_short_path[0] ||
       path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
      ralloc_free(path->path);
}

/**
 * Recursively removes unused deref instructions
 */
bool
nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
{
   bool progress = false;

   for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
      /* If anyone is using this deref, leave it alone */
      assert(d->dest.is_ssa);
      if (!nir_ssa_def_is_unused(&d->dest.ssa))
         break;

      nir_instr_remove(&d->instr);
      progress = true;
   }

   return progress;
}

bool
nir_deref_instr_has_indirect(nir_deref_instr *instr)
{
   while (instr->deref_type != nir_deref_type_var) {
      /* Consider casts to be indirects */
      if (instr->deref_type == nir_deref_type_cast)
         return true;

      if ((instr->deref_type == nir_deref_type_array ||
           instr->deref_type == nir_deref_type_ptr_as_array) &&
          !nir_src_is_const(instr->arr.index))
         return true;

      instr = nir_deref_instr_parent(instr);
   }

   return false;
}

bool
nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
{
   for (; instr; instr = nir_deref_instr_parent(instr)) {
      if (instr->deref_type == nir_deref_type_array &&
          nir_src_is_const(instr->arr.index) &&
           nir_src_as_uint(instr->arr.index) >=
           glsl_get_length(nir_deref_instr_parent(instr)->type))
         return true;
   }

   return false;
}

bool
nir_deref_instr_has_complex_use(nir_deref_instr *deref,
                                nir_deref_instr_has_complex_use_options opts)
{
   nir_foreach_use_including_if(use_src, &deref->dest.ssa) {
      if (use_src->is_if)
         return true;

      nir_instr *use_instr = use_src->parent_instr;

      switch (use_instr->type) {
      case nir_instr_type_deref: {
         nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);

         /* A var deref has no sources */
         assert(use_deref->deref_type != nir_deref_type_var);

         /* If a deref shows up in an array index or something like that, it's
          * a complex use.
          */
         if (use_src != &use_deref->parent)
            return true;

         /* Anything that isn't a basic struct or array deref is considered to
          * be a "complex" use.  In particular, we don't allow ptr_as_array
          * because we assume that opt_deref will turn any non-complex
          * ptr_as_array derefs into regular array derefs eventually so passes
          * which only want to handle simple derefs will pick them up in a
          * later pass.
          */
         if (use_deref->deref_type != nir_deref_type_struct &&
             use_deref->deref_type != nir_deref_type_array_wildcard &&
             use_deref->deref_type != nir_deref_type_array)
            return true;

         if (nir_deref_instr_has_complex_use(use_deref, opts))
            return true;

         continue;
      }

      case nir_instr_type_intrinsic: {
         nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
         switch (use_intrin->intrinsic) {
         case nir_intrinsic_load_deref:
            assert(use_src == &use_intrin->src[0]);
            continue;

         case nir_intrinsic_copy_deref:
            assert(use_src == &use_intrin->src[0] ||
                   use_src == &use_intrin->src[1]);
            continue;

         case nir_intrinsic_store_deref:
            /* A use in src[1] of a store means we're taking that pointer and
             * writing it to a variable.  Because we have no idea who will
             * read that variable and what they will do with the pointer, it's
             * considered a "complex" use.  A use in src[0], on the other
             * hand, is a simple use because we're just going to dereference
             * it and write a value there.
             */
            if (use_src == &use_intrin->src[0])
               continue;
            return true;

         case nir_intrinsic_memcpy_deref:
            if (use_src == &use_intrin->src[0] &&
                (opts & nir_deref_instr_has_complex_use_allow_memcpy_dst))
               continue;
            if (use_src == &use_intrin->src[1] &&
                (opts & nir_deref_instr_has_complex_use_allow_memcpy_src))
               continue;
            return true;

         default:
            return true;
         }
         unreachable("Switch default failed");
      }

      default:
         return true;
      }
   }

   return false;
}

static unsigned
type_scalar_size_bytes(const struct glsl_type *type)
{
   assert(glsl_type_is_vector_or_scalar(type) ||
          glsl_type_is_matrix(type));
   return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
}

unsigned
nir_deref_instr_array_stride(nir_deref_instr *deref)
{
   switch (deref->deref_type) {
   case nir_deref_type_array:
   case nir_deref_type_array_wildcard: {
      const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
      unsigned stride = glsl_get_explicit_stride(arr_type);

      if ((glsl_type_is_matrix(arr_type) &&
           glsl_matrix_type_is_row_major(arr_type)) ||
          (glsl_type_is_vector(arr_type) && stride == 0))
         stride = type_scalar_size_bytes(arr_type);

      return stride;
   }
   case nir_deref_type_ptr_as_array:
      return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
   case nir_deref_type_cast:
      return deref->cast.ptr_stride;
   default:
      return 0;
   }
}

static unsigned
type_get_array_stride(const struct glsl_type *elem_type,
                      glsl_type_size_align_func size_align)
{
   unsigned elem_size, elem_align;
   size_align(elem_type, &elem_size, &elem_align);
   return ALIGN_POT(elem_size, elem_align);
}

static unsigned
struct_type_get_field_offset(const struct glsl_type *struct_type,
                             glsl_type_size_align_func size_align,
                             unsigned field_idx)
{
   assert(glsl_type_is_struct_or_ifc(struct_type));
   unsigned offset = 0;
   for (unsigned i = 0; i <= field_idx; i++) {
      unsigned elem_size, elem_align;
      size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
      offset = ALIGN_POT(offset, elem_align);
      if (i < field_idx)
         offset += elem_size;
   }
   return offset;
}

unsigned
nir_deref_instr_get_const_offset(nir_deref_instr *deref,
                                 glsl_type_size_align_func size_align)
{
   nir_deref_path path;
   nir_deref_path_init(&path, deref, NULL);

   unsigned offset = 0;
   for (nir_deref_instr **p = &path.path[1]; *p; p++) {
      switch ((*p)->deref_type) {
      case nir_deref_type_array:
         offset += nir_src_as_uint((*p)->arr.index) *
                   type_get_array_stride((*p)->type, size_align);
	 break;
      case nir_deref_type_struct: {
         /* p starts at path[1], so this is safe */
         nir_deref_instr *parent = *(p - 1);
         offset += struct_type_get_field_offset(parent->type, size_align,
                                                (*p)->strct.index);
	 break;
      }
      case nir_deref_type_cast:
         /* A cast doesn't contribute to the offset */
         break;
      default:
         unreachable("Unsupported deref type");
      }
   }

   nir_deref_path_finish(&path);

   return offset;
}

nir_ssa_def *
nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
                       glsl_type_size_align_func size_align)
{
   nir_deref_path path;
   nir_deref_path_init(&path, deref, NULL);

   nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
   for (nir_deref_instr **p = &path.path[1]; *p; p++) {
      switch ((*p)->deref_type) {
      case nir_deref_type_array:
      case nir_deref_type_ptr_as_array: {
         nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
         int stride = type_get_array_stride((*p)->type, size_align);
         offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
         break;
      }
      case nir_deref_type_struct: {
         /* p starts at path[1], so this is safe */
         nir_deref_instr *parent = *(p - 1);
         unsigned field_offset =
            struct_type_get_field_offset(parent->type, size_align,
                                         (*p)->strct.index);
         offset = nir_iadd_imm(b, offset, field_offset);
         break;
      }
      case nir_deref_type_cast:
         /* A cast doesn't contribute to the offset */
         break;
      default:
         unreachable("Unsupported deref type");
      }
   }

   nir_deref_path_finish(&path);

   return offset;
}

bool
nir_remove_dead_derefs_impl(nir_function_impl *impl)
{
   bool progress = false;

   nir_foreach_block(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         if (instr->type == nir_instr_type_deref &&
             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
            progress = true;
      }
   }

   if (progress) {
      nir_metadata_preserve(impl, nir_metadata_block_index |
                                  nir_metadata_dominance);
   } else {
      nir_metadata_preserve(impl, nir_metadata_all);
   }

   return progress;
}

bool
nir_remove_dead_derefs(nir_shader *shader)
{
   bool progress = false;
   nir_foreach_function(function, shader) {
      if (function->impl && nir_remove_dead_derefs_impl(function->impl))
         progress = true;
   }

   return progress;
}

void
nir_fixup_deref_modes(nir_shader *shader)
{
   nir_foreach_function(function, shader) {
      if (!function->impl)
         continue;

      nir_foreach_block(block, function->impl) {
         nir_foreach_instr(instr, block) {
            if (instr->type != nir_instr_type_deref)
               continue;

            nir_deref_instr *deref = nir_instr_as_deref(instr);
            if (deref->deref_type == nir_deref_type_cast)
               continue;

            nir_variable_mode parent_modes;
            if (deref->deref_type == nir_deref_type_var) {
               parent_modes = deref->var->data.mode;
            } else {
               assert(deref->parent.is_ssa);
               nir_deref_instr *parent =
                  nir_instr_as_deref(deref->parent.ssa->parent_instr);
               parent_modes = parent->modes;
            }

            deref->modes = parent_modes;
         }
      }
   }
}

static bool
modes_may_alias(nir_variable_mode a, nir_variable_mode b)
{
   /* Generic pointers can alias with SSBOs */
   if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
       (b & (nir_var_mem_ssbo | nir_var_mem_global)))
      return true;

   /* Pointers can only alias if they share a mode. */
   return a & b;
}

ALWAYS_INLINE static nir_deref_compare_result
compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path,
                    unsigned *i, bool (*stop_fn)(const nir_deref_instr *))
{
   /* Start off assuming they fully compare.  We ignore equality for now.  In
    * the end, we'll determine that by containment.
    */
   nir_deref_compare_result result = nir_derefs_may_alias_bit |
                                     nir_derefs_a_contains_b_bit |
                                     nir_derefs_b_contains_a_bit;

   nir_deref_instr **a = a_path->path;
   nir_deref_instr **b = b_path->path;

   for (; a[*i] != NULL; (*i)++) {
      if (a[*i] != b[*i])
         break;

      if (stop_fn && stop_fn(a[*i]))
         break;
   }

   /* We're at either the tail or the divergence point between the two deref
    * paths.  Look to see if either contains cast or a ptr_as_array deref.  If
    * it does we don't know how to safely make any inferences.  Hopefully,
    * nir_opt_deref will clean most of these up and we can start inferring
    * things again.
    *
    * In theory, we could do a bit better.  For instance, we could detect the
    * case where we have exactly one ptr_as_array deref in the chain after the
    * divergence point and it's matched in both chains and the two chains have
    * different constant indices.
    */
   for (unsigned j = *i; a[j] != NULL; j++) {
      if (stop_fn && stop_fn(a[j]))
         break;

      if (a[j]->deref_type == nir_deref_type_cast ||
          a[j]->deref_type == nir_deref_type_ptr_as_array)
         return nir_derefs_may_alias_bit;
   }
   for (unsigned j = *i; b[j] != NULL; j++) {
      if (stop_fn && stop_fn(b[j]))
         break;

      if (b[j]->deref_type == nir_deref_type_cast ||
          b[j]->deref_type == nir_deref_type_ptr_as_array)
         return nir_derefs_may_alias_bit;
   }

   for (; a[*i] != NULL && b[*i] != NULL; (*i)++) {
      if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i])))
         break;

      switch (a[*i]->deref_type) {
      case nir_deref_type_array:
      case nir_deref_type_array_wildcard: {
         assert(b[*i]->deref_type == nir_deref_type_array ||
                b[*i]->deref_type == nir_deref_type_array_wildcard);

         if (a[*i]->deref_type == nir_deref_type_array_wildcard) {
            if (b[*i]->deref_type != nir_deref_type_array_wildcard)
               result &= ~nir_derefs_b_contains_a_bit;
         } else if (b[*i]->deref_type == nir_deref_type_array_wildcard) {
            if (a[*i]->deref_type != nir_deref_type_array_wildcard)
               result &= ~nir_derefs_a_contains_b_bit;
         } else {
            assert(a[*i]->deref_type == nir_deref_type_array &&
                   b[*i]->deref_type == nir_deref_type_array);
            assert(a[*i]->arr.index.is_ssa && b[*i]->arr.index.is_ssa);

            if (nir_src_is_const(a[*i]->arr.index) &&
                nir_src_is_const(b[*i]->arr.index)) {
               /* If they're both direct and have different offsets, they
                * don't even alias much less anything else.
                */
               if (nir_src_as_uint(a[*i]->arr.index) !=
                   nir_src_as_uint(b[*i]->arr.index))
                  return nir_derefs_do_not_alias;
            } else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) {
               /* They're the same indirect, continue on */
            } else {
               /* They're not the same index so we can't prove anything about
                * containment.
                */
               result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
            }
         }
         break;
      }

      case nir_deref_type_struct: {
         /* If they're different struct members, they don't even alias */
         if (a[*i]->strct.index != b[*i]->strct.index)
            return nir_derefs_do_not_alias;
         break;
      }

      default:
         unreachable("Invalid deref type");
      }
   }

   /* If a is longer than b, then it can't contain b.  If neither a[i] nor
    * b[i] are NULL then we aren't at the end of the chain and we know nothing
    * about containment.
    */
   if (a[*i] != NULL)
      result &= ~nir_derefs_a_contains_b_bit;
   if (b[*i] != NULL)
      result &= ~nir_derefs_b_contains_a_bit;

   /* If a contains b and b contains a they must be equal. */
   if ((result & nir_derefs_a_contains_b_bit) &&
       (result & nir_derefs_b_contains_a_bit))
      result |= nir_derefs_equal_bit;

   return result;
}

static bool
is_interface_struct_deref(const nir_deref_instr *deref)
{
   if (deref->deref_type == nir_deref_type_struct) {
      assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type));
      return true;
   } else {
      return false;
   }
}

nir_deref_compare_result
nir_compare_deref_paths(nir_deref_path *a_path,
                        nir_deref_path *b_path)
{
   if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
      return nir_derefs_do_not_alias;

   if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
      return nir_derefs_may_alias_bit;

   unsigned path_idx = 1;
   if (a_path->path[0]->deref_type == nir_deref_type_var) {
      const nir_variable *a_var = a_path->path[0]->var;
      const nir_variable *b_var = b_path->path[0]->var;

      /* If we got here, the two variables must have the same mode.  The
       * only way modes_may_alias() can return true for two different modes
       * is if one is global and the other ssbo.  However, Global variables
       * only exist in OpenCL and SSBOs don't exist there.  No API allows
       * both for variables.
       */
      assert(a_var->data.mode == b_var->data.mode);

      switch (a_var->data.mode) {
      case nir_var_mem_ssbo: {
         nir_deref_compare_result binding_compare;
         if (a_var == b_var) {
            binding_compare = compare_deref_paths(a_path, b_path, &path_idx,
                                                  is_interface_struct_deref);
         } else {
            binding_compare = nir_derefs_do_not_alias;
         }

         if (binding_compare & nir_derefs_equal_bit)
            break;

         /* If the binding derefs can't alias and at least one is RESTRICT,
          * then we know they can't alias.
          */
         if (!(binding_compare & nir_derefs_may_alias_bit) &&
             ((a_var->data.access & ACCESS_RESTRICT) ||
              (b_var->data.access & ACCESS_RESTRICT)))
            return nir_derefs_do_not_alias;

         return nir_derefs_may_alias_bit;
      }

      case nir_var_mem_shared:
         if (a_var == b_var)
            break;

         /* Per SPV_KHR_workgroup_memory_explicit_layout and
          * GL_EXT_shared_memory_block, shared blocks alias each other.
          * We will have either all blocks or all non-blocks.
          */
         if (glsl_type_is_interface(a_var->type) ||
             glsl_type_is_interface(b_var->type)) {
            assert(glsl_type_is_interface(a_var->type) &&
                   glsl_type_is_interface(b_var->type));
            return nir_derefs_may_alias_bit;
         }

         /* Otherwise, distinct shared vars don't alias */
         return nir_derefs_do_not_alias;

      default:
         /* For any other variable types, if we can chase them back to the
          * variable, and the variables are different, they don't alias.
          */
         if (a_var == b_var)
            break;

         return nir_derefs_do_not_alias;
      }
   } else {
      assert(a_path->path[0]->deref_type == nir_deref_type_cast);
      /* If they're not exactly the same cast, it's hard to compare them so we
       * just assume they alias.  Comparing casts is tricky as there are lots
       * of things such as mode, type, etc. to make sure work out; for now, we
       * just assume nit_opt_deref will combine them and compare the deref
       * instructions.
       *
       * TODO: At some point in the future, we could be clever and understand
       * that a float[] and int[] have the same layout and aliasing structure
       * but double[] and vec3[] do not and we could potentially be a bit
       * smarter here.
       */
      if (a_path->path[0] != b_path->path[0])
         return nir_derefs_may_alias_bit;
   }

   return compare_deref_paths(a_path, b_path, &path_idx, NULL);
}

nir_deref_compare_result
nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
{
   if (a == b) {
      return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
             nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
   }

   nir_deref_path a_path, b_path;
   nir_deref_path_init(&a_path, a, NULL);
   nir_deref_path_init(&b_path, b, NULL);
   assert(a_path.path[0]->deref_type == nir_deref_type_var ||
          a_path.path[0]->deref_type == nir_deref_type_cast);
   assert(b_path.path[0]->deref_type == nir_deref_type_var ||
          b_path.path[0]->deref_type == nir_deref_type_cast);

   nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);

   nir_deref_path_finish(&a_path);
   nir_deref_path_finish(&b_path);

   return result;
}

nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
{
   if (!deref->_path) {
      deref->_path = ralloc(mem_ctx, nir_deref_path);
      nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
   }
   return deref->_path;
}

nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
                                                      nir_deref_and_path *a,
                                                      nir_deref_and_path *b)
{
   if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
      return nir_compare_derefs(a->instr, b->instr);

   return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
                                  nir_get_deref_path(mem_ctx, b));
}

struct rematerialize_deref_state {
   bool progress;
   nir_builder builder;
   nir_block *block;
   struct hash_table *cache;
};

static nir_deref_instr *
rematerialize_deref_in_block(nir_deref_instr *deref,
                             struct rematerialize_deref_state *state)
{
   if (deref->instr.block == state->block)
      return deref;

   if (!state->cache) {
      state->cache = _mesa_pointer_hash_table_create(NULL);
   }

   struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
   if (cached)
      return cached->data;

   nir_builder *b = &state->builder;
   nir_deref_instr *new_deref =
      nir_deref_instr_create(b->shader, deref->deref_type);
   new_deref->modes = deref->modes;
   new_deref->type = deref->type;

   if (deref->deref_type == nir_deref_type_var) {
      new_deref->var = deref->var;
   } else {
      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
      if (parent) {
         parent = rematerialize_deref_in_block(parent, state);
         new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
      } else {
         nir_src_copy(&new_deref->parent, &deref->parent, &new_deref->instr);
      }
   }

   switch (deref->deref_type) {
   case nir_deref_type_var:
   case nir_deref_type_array_wildcard:
      /* Nothing more to do */
      break;

   case nir_deref_type_cast:
      new_deref->cast.ptr_stride = deref->cast.ptr_stride;
      new_deref->cast.align_mul = deref->cast.align_mul;
      new_deref->cast.align_offset = deref->cast.align_offset;
      break;

   case nir_deref_type_array:
   case nir_deref_type_ptr_as_array:
      assert(!nir_src_as_deref(deref->arr.index));
      nir_src_copy(&new_deref->arr.index, &deref->arr.index, &new_deref->instr);
      break;

   case nir_deref_type_struct:
      new_deref->strct.index = deref->strct.index;
      break;

   default:
      unreachable("Invalid deref instruction type");
   }

   nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
                     deref->dest.ssa.num_components,
                     deref->dest.ssa.bit_size,
                     NULL);
   nir_builder_instr_insert(b, &new_deref->instr);

   return new_deref;
}

static bool
rematerialize_deref_src(nir_src *src, void *_state)
{
   struct rematerialize_deref_state *state = _state;

   nir_deref_instr *deref = nir_src_as_deref(*src);
   if (!deref)
      return true;

   nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
   if (block_deref != deref) {
      nir_instr_rewrite_src(src->parent_instr, src,
                            nir_src_for_ssa(&block_deref->dest.ssa));
      nir_deref_instr_remove_if_unused(deref);
      state->progress = true;
   }

   return true;
}

/** Re-materialize derefs in every block
 *
 * This pass re-materializes deref instructions in every block in which it is
 * used.  After this pass has been run, every use of a deref will be of a
 * deref in the same block as the use.  Also, all unused derefs will be
 * deleted as a side-effect.
 *
 * Derefs used as sources of phi instructions are not rematerialized.
 */
bool
nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
{
   struct rematerialize_deref_state state = { 0 };
   nir_builder_init(&state.builder, impl);

   nir_foreach_block_unstructured(block, impl) {
      state.block = block;

      /* Start each block with a fresh cache */
      if (state.cache)
         _mesa_hash_table_clear(state.cache, NULL);

      nir_foreach_instr_safe(instr, block) {
         if (instr->type == nir_instr_type_deref &&
             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
            continue;

         /* If a deref is used in a phi, we can't rematerialize it, as the new
          * derefs would appear before the phi, which is not valid.
          */
         if (instr->type == nir_instr_type_phi)
            continue;

         state.builder.cursor = nir_before_instr(instr);
         nir_foreach_src(instr, rematerialize_deref_src, &state);
      }

#ifndef NDEBUG
      nir_if *following_if = nir_block_get_following_if(block);
      if (following_if)
         assert(!nir_src_as_deref(following_if->condition));
#endif
   }

   _mesa_hash_table_destroy(state.cache, NULL);

   return state.progress;
}

static void
nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
{
   nir_foreach_use(use, &parent->dest.ssa) {
      if (use->parent_instr->type != nir_instr_type_deref)
         continue;

      nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
      switch (child->deref_type) {
      case nir_deref_type_var:
         unreachable("nir_deref_type_var cannot be a child");

      case nir_deref_type_array:
      case nir_deref_type_array_wildcard:
         child->type = glsl_get_array_element(parent->type);
         break;

      case nir_deref_type_ptr_as_array:
         child->type = parent->type;
         break;

      case nir_deref_type_struct:
         child->type = glsl_get_struct_field(parent->type,
                                             child->strct.index);
         break;

      case nir_deref_type_cast:
         /* We stop the recursion here */
         continue;
      }

      /* Recurse into children */
      nir_deref_instr_fixup_child_types(child);
   }
}

static bool
opt_alu_of_cast(nir_alu_instr *alu)
{
   bool progress = false;

   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
      assert(alu->src[i].src.is_ssa);
      nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
      if (src_instr->type != nir_instr_type_deref)
         continue;

      nir_deref_instr *src_deref = nir_instr_as_deref(src_instr);
      if (src_deref->deref_type != nir_deref_type_cast)
         continue;

      assert(src_deref->parent.is_ssa);
      nir_instr_rewrite_src_ssa(&alu->instr, &alu->src[i].src,
                                src_deref->parent.ssa);
      progress = true;
   }

   return progress;
}

static bool
is_trivial_array_deref_cast(nir_deref_instr *cast)
{
   assert(is_trivial_deref_cast(cast));

   nir_deref_instr *parent = nir_src_as_deref(cast->parent);

   if (parent->deref_type == nir_deref_type_array) {
      return cast->cast.ptr_stride ==
             glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
   } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
      return cast->cast.ptr_stride ==
             nir_deref_instr_array_stride(parent);
   } else {
      return false;
   }
}

static bool
is_deref_ptr_as_array(nir_instr *instr)
{
   return instr->type == nir_instr_type_deref &&
          nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
}

static bool
opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
{
   assert(cast->deref_type == nir_deref_type_cast);
   if (cast->cast.align_mul == 0)
      return false;

   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
   if (parent == NULL)
      return false;

   /* Don't use any default alignment for this check.  We don't want to fall
    * back to type alignment too early in case we find out later that we're
    * somehow a child of a packed struct.
    */
   uint32_t parent_mul, parent_offset;
   if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
                                     &parent_mul, &parent_offset))
      return false;

   /* If this cast increases the alignment, we want to keep it.
    *
    * There is a possibility that the larger alignment provided by this cast
    * somehow disagrees with the smaller alignment further up the deref chain.
    * In that case, we choose to favor the alignment closer to the actual
    * memory operation which, in this case, is the cast and not its parent so
    * keeping the cast alignment is the right thing to do.
    */
   if (parent_mul < cast->cast.align_mul)
      return false;

   /* If we've gotten here, we have a parent deref with an align_mul at least
    * as large as ours so we can potentially throw away the alignment
    * information on this deref.  There are two cases to consider here:
    *
    *  1. We can chase the deref all the way back to the variable.  In this
    *     case, we have "perfect" knowledge, modulo indirect array derefs.
    *     Unless we've done something wrong in our indirect/wildcard stride
    *     calculations, our knowledge from the deref walk is better than the
    *     client's.
    *
    *  2. We can't chase it all the way back to the variable.  In this case,
    *     because our call to nir_get_explicit_deref_align(parent, ...) above
    *     above passes default_to_type_align=false, the only way we can even
    *     get here is if something further up the deref chain has a cast with
    *     an alignment which can only happen if we get an alignment from the
    *     client (most likely a decoration in the SPIR-V).  If the client has
    *     provided us with two conflicting alignments in the deref chain,
    *     that's their fault and we can do whatever we want.
    *
    * In either case, we should be without our rights, at this point, to throw
    * away the alignment information on this deref.  However, to be "nice" to
    * weird clients, we do one more check.  It really shouldn't happen but
    * it's possible that the parent's alignment offset disagrees with the
    * cast's alignment offset.  In this case, we consider the cast as
    * providing more information (or at least more valid information) and keep
    * it even if the align_mul from the parent is larger.
    */
   assert(cast->cast.align_mul <= parent_mul);
   if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
      return false;

   /* If we got here, the parent has better alignment information than the
    * child and we can get rid of the child alignment information.
    */
   cast->cast.align_mul = 0;
   cast->cast.align_offset = 0;
   return true;
}

/**
 * Remove casts that just wrap other casts.
 */
static bool
opt_remove_cast_cast(nir_deref_instr *cast)
{
   nir_deref_instr *parent = nir_deref_instr_parent(cast);
   if (parent == NULL || parent->deref_type != nir_deref_type_cast)
      return false;

   /* Copy align info from the parent cast if needed
    *
    * In the case that align_mul = 0, the alignment for this cast is inhereted
    * from the parent deref (if any). If we aren't careful, removing our
    * parent cast from the chain may lose alignment information so we need to
    * copy the parent's alignment information (if any).
    *
    * opt_remove_restricting_cast_alignments() above is run before this pass
    * and will will have cleared our alignment (set align_mul = 0) in the case
    * where the parent's alignment information is somehow superior.
    */
   if (cast->cast.align_mul == 0) {
      cast->cast.align_mul = parent->cast.align_mul;
      cast->cast.align_offset = parent->cast.align_offset;
   }

   nir_instr_rewrite_src(&cast->instr, &cast->parent,
                         nir_src_for_ssa(parent->parent.ssa));
   return true;
}

/* Restrict variable modes in casts.
 *
 * If we know from something higher up the deref chain that the deref has a
 * specific mode, we can cast to more general and back but we can never cast
 * across modes.  For non-cast derefs, we should only ever do anything here if
 * the parent eventually comes from a cast that we restricted earlier.
 */
static bool
opt_restrict_deref_modes(nir_deref_instr *deref)
{
   if (deref->deref_type == nir_deref_type_var) {
      assert(deref->modes == deref->var->data.mode);
      return false;
   }

   nir_deref_instr *parent = nir_src_as_deref(deref->parent);
   if (parent == NULL || parent->modes == deref->modes)
      return false;

   assert(parent->modes & deref->modes);
   deref->modes &= parent->modes;
   return true;
}

static bool
opt_remove_sampler_cast(nir_deref_instr *cast)
{
   assert(cast->deref_type == nir_deref_type_cast);
   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
   if (parent == NULL)
      return false;

   /* Strip both types down to their non-array type and bail if there are any
    * discrepancies in array lengths.
    */
   const struct glsl_type *parent_type = parent->type;
   const struct glsl_type *cast_type = cast->type;
   while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
      if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
         return false;
      parent_type = glsl_get_array_element(parent_type);
      cast_type = glsl_get_array_element(cast_type);
   }

   if (!glsl_type_is_sampler(parent_type))
      return false;

   if (cast_type != glsl_bare_sampler_type() &&
       (glsl_type_is_bare_sampler(parent_type) ||
        cast_type != glsl_sampler_type_to_texture(parent_type)))
      return false;

   /* We're a cast from a more detailed sampler type to a bare sampler or a
    * texture type with the same dimensionality.
    */
   nir_ssa_def_rewrite_uses(&cast->dest.ssa,
                            &parent->dest.ssa);
   nir_instr_remove(&cast->instr);

   /* Recursively crawl the deref tree and clean up types */
   nir_deref_instr_fixup_child_types(parent);

   return true;
}

/**
 * Is this casting a struct to a contained struct.
 * struct a { struct b field0 };
 * ssa_5 is structa;
 * deref_cast (structb *)ssa_5 (function_temp structb);
 * converts to
 * deref_struct &ssa_5->field0 (function_temp structb);
 * This allows subsequent copy propagation to work.
 */
static bool
opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
{
   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
   if (!parent)
      return false;

   if (cast->cast.align_mul > 0)
      return false;

   if (!glsl_type_is_struct(parent->type))
      return false;

   /* Empty struct */
   if (glsl_get_length(parent->type) < 1)
      return false;

   if (glsl_get_struct_field_offset(parent->type, 0) != 0)
      return false;

   const struct glsl_type *field_type = glsl_get_struct_field(parent->type, 0);
   if (cast->type != field_type)
      return false;

   /* we can't drop the stride information */
   if (cast->cast.ptr_stride != glsl_get_explicit_stride(field_type))
      return false;

   nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
   nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
   nir_deref_instr_remove_if_unused(cast);
   return true;
}

static bool
opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
{
   bool progress = false;

   progress |= opt_remove_restricting_cast_alignments(cast);

   if (opt_replace_struct_wrapper_cast(b, cast))
      return true;

   if (opt_remove_sampler_cast(cast))
      return true;

   progress |= opt_remove_cast_cast(cast);
   if (!is_trivial_deref_cast(cast))
      return progress;

   /* If this deref still contains useful alignment information, we don't want
    * to delete it.
    */
   if (cast->cast.align_mul > 0)
      return progress;

   bool trivial_array_cast = is_trivial_array_deref_cast(cast);

   assert(cast->dest.is_ssa);
   assert(cast->parent.is_ssa);

   nir_foreach_use_including_if_safe(use_src, &cast->dest.ssa) {
      assert(!use_src->is_if && "there cannot be if-uses");

      /* If this isn't a trivial array cast, we can't propagate into
       * ptr_as_array derefs.
       */
      if (is_deref_ptr_as_array(use_src->parent_instr) &&
          !trivial_array_cast)
         continue;

      nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
      progress = true;
   }

   if (nir_deref_instr_remove_if_unused(cast))
      progress = true;

   return progress;
}

static bool
opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
{
   assert(deref->deref_type == nir_deref_type_ptr_as_array);

   nir_deref_instr *parent = nir_deref_instr_parent(deref);

   if (nir_src_is_const(deref->arr.index) &&
       nir_src_as_int(deref->arr.index) == 0) {
      /* If it's a ptr_as_array deref with an index of 0, it does nothing
       * and we can just replace its uses with its parent, unless it has
       * alignment information.
       *
       * The source of a ptr_as_array deref always has a deref_type of
       * nir_deref_type_array or nir_deref_type_cast.  If it's a cast, it
       * may be trivial and we may be able to get rid of that too.  Any
       * trivial cast of trivial cast cases should be handled already by
       * opt_deref_cast() above.
       */
      if (parent->deref_type == nir_deref_type_cast &&
          parent->cast.align_mul == 0 &&
          is_trivial_deref_cast(parent))
         parent = nir_deref_instr_parent(parent);
      nir_ssa_def_rewrite_uses(&deref->dest.ssa,
                               &parent->dest.ssa);
      nir_instr_remove(&deref->instr);
      return true;
   }

   if (parent->deref_type != nir_deref_type_array &&
       parent->deref_type != nir_deref_type_ptr_as_array)
      return false;

   assert(parent->parent.is_ssa);
   assert(parent->arr.index.is_ssa);
   assert(deref->arr.index.is_ssa);

   deref->arr.in_bounds &= parent->arr.in_bounds;

   nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
                                      deref->arr.index.ssa);

   deref->deref_type = parent->deref_type;
   nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
   nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
                         nir_src_for_ssa(new_idx));
   return true;
}

static bool
is_vector_bitcast_deref(nir_deref_instr *cast,
                        nir_component_mask_t mask,
                        bool is_write)
{
   if (cast->deref_type != nir_deref_type_cast)
      return false;

   /* Don't throw away useful alignment information */
   if (cast->cast.align_mul > 0)
      return false;

   /* It has to be a cast of another deref */
   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
   if (parent == NULL)
      return false;

   /* The parent has to be a vector or scalar */
   if (!glsl_type_is_vector_or_scalar(parent->type))
      return false;

   /* Don't bother with 1-bit types */
   unsigned cast_bit_size = glsl_get_bit_size(cast->type);
   unsigned parent_bit_size = glsl_get_bit_size(parent->type);
   if (cast_bit_size == 1 || parent_bit_size == 1)
      return false;

   /* A strided vector type means it's not tightly packed */
   if (glsl_get_explicit_stride(cast->type) ||
       glsl_get_explicit_stride(parent->type))
      return false;

   assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
   assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
   unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
   unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
                           (parent_bit_size / 8);
   if (bytes_used > parent_bytes)
      return false;

   if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
                                                       parent_bit_size))
      return false;

   return true;
}

static nir_ssa_def *
resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
{
   if (num_components == data->num_components)
      return data;

   unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
   for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
      swiz[i] = i;

   return nir_swizzle(b, data, swiz, num_components);
}

static bool
opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
{
   nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
   nir_component_mask_t read_mask =
      nir_ssa_def_components_read(&load->dest.ssa);

   /* LLVM loves take advantage of the fact that vec3s in OpenCL are
    * vec4-aligned and so it can just read/write them as vec4s.  This
    * results in a LOT of vec4->vec3 casts on loads and stores.
    */
   if (is_vector_bitcast_deref(deref, read_mask, false)) {
      const unsigned old_num_comps = load->dest.ssa.num_components;
      const unsigned old_bit_size = load->dest.ssa.bit_size;

      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
      const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
      const unsigned new_bit_size = glsl_get_bit_size(parent->type);

      /* Stomp it to reference the parent */
      nir_instr_rewrite_src(&load->instr, &load->src[0],
                            nir_src_for_ssa(&parent->dest.ssa));
      assert(load->dest.is_ssa);
      load->dest.ssa.bit_size = new_bit_size;
      load->dest.ssa.num_components = new_num_comps;
      load->num_components = new_num_comps;

      b->cursor = nir_after_instr(&load->instr);
      nir_ssa_def *data = &load->dest.ssa;
      if (old_bit_size != new_bit_size)
         data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
      data = resize_vector(b, data, old_num_comps);

      nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
                                     data->parent_instr);
      return true;
   }

   return false;
}

static bool
opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
{
   nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
   nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);

   /* LLVM loves take advantage of the fact that vec3s in OpenCL are
    * vec4-aligned and so it can just read/write them as vec4s.  This
    * results in a LOT of vec4->vec3 casts on loads and stores.
    */
   if (is_vector_bitcast_deref(deref, write_mask, true)) {
      assert(store->src[1].is_ssa);
      nir_ssa_def *data = store->src[1].ssa;

      const unsigned old_bit_size = data->bit_size;

      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
      const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
      const unsigned new_bit_size = glsl_get_bit_size(parent->type);

      nir_instr_rewrite_src(&store->instr, &store->src[0],
                            nir_src_for_ssa(&parent->dest.ssa));

      /* Restrict things down as needed so the bitcast doesn't fail */
      data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
      if (old_bit_size != new_bit_size)
         data = nir_bitcast_vector(b, data, new_bit_size);
      data = resize_vector(b, data, new_num_comps);
      nir_instr_rewrite_src(&store->instr, &store->src[1],
                            nir_src_for_ssa(data));
      store->num_components = new_num_comps;

      /* Adjust the write mask */
      write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
                                                  new_bit_size);
      nir_intrinsic_set_write_mask(store, write_mask);
      return true;
   }

   return false;
}

static bool
opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
{
   nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
   if (deref == NULL)
      return false;

   nir_ssa_def *deref_is = NULL;

   if (nir_deref_mode_must_be(deref, modes))
      deref_is = nir_imm_true(b);

   if (!nir_deref_mode_may_be(deref, modes))
      deref_is = nir_imm_false(b);

   if (deref_is == NULL)
      return false;

   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
   nir_instr_remove(&intrin->instr);
   return true;
}

bool
nir_opt_deref_impl(nir_function_impl *impl)
{
   bool progress = false;

   nir_builder b;
   nir_builder_init(&b, impl);

   nir_foreach_block(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         b.cursor = nir_before_instr(instr);

         switch (instr->type) {
         case nir_instr_type_alu: {
            nir_alu_instr *alu = nir_instr_as_alu(instr);
            if (opt_alu_of_cast(alu))
               progress = true;
            break;
         }

         case nir_instr_type_deref: {
            nir_deref_instr *deref = nir_instr_as_deref(instr);

            if (opt_restrict_deref_modes(deref))
               progress = true;

            switch (deref->deref_type) {
            case nir_deref_type_ptr_as_array:
               if (opt_deref_ptr_as_array(&b, deref))
                  progress = true;
               break;

            case nir_deref_type_cast:
               if (opt_deref_cast(&b, deref))
                  progress = true;
               break;

            default:
               /* Do nothing */
               break;
            }
            break;
         }

         case nir_instr_type_intrinsic: {
            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
            switch (intrin->intrinsic) {
            case nir_intrinsic_load_deref:
               if (opt_load_vec_deref(&b, intrin))
                  progress = true;
               break;

            case nir_intrinsic_store_deref:
               if (opt_store_vec_deref(&b, intrin))
                  progress = true;
               break;

            case nir_intrinsic_deref_mode_is:
               if (opt_known_deref_mode_is(&b, intrin))
                  progress = true;
               break;

            default:
               /* Do nothing */
               break;
            }
            break;
         }

         default:
            /* Do nothing */
            break;
         }
      }
   }

   if (progress) {
      nir_metadata_preserve(impl, nir_metadata_block_index |
                                  nir_metadata_dominance);
   } else {
      nir_metadata_preserve(impl, nir_metadata_all);
   }

   return progress;
}

bool
nir_opt_deref(nir_shader *shader)
{
   bool progress = false;

   nir_foreach_function(func, shader) {
      if (func->impl && nir_opt_deref_impl(func->impl))
         progress = true;
   }

   return progress;
}