From 15640e58d96c5db0cd78769a06b6b204dcd60799 Mon Sep 17 00:00:00 2001
From: Rhys Perry <pendingchaos02@gmail.com>
Date: Thu, 12 Aug 2021 15:36:56 +0100
Subject: [PATCH] radv,aco: lower texture descriptor loads in NIR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fossil-db (Sienna Cichlid):
Totals from 39445 (24.30% of 162293) affected shaders:
MaxWaves: 875988 -> 875972 (-0.00%)
Instrs: 35372561 -> 35234909 (-0.39%); split: -0.41%, +0.03%
CodeSize: 190237480 -> 189379240 (-0.45%); split: -0.47%, +0.02%
VGPRs: 1889856 -> 1889928 (+0.00%); split: -0.00%, +0.01%
SpillSGPRs: 10764 -> 10857 (+0.86%); split: -2.04%, +2.91%
SpillVGPRs: 1891 -> 1907 (+0.85%); split: -0.32%, +1.16%
Scratch: 260096 -> 261120 (+0.39%)
Latency: 477701150 -> 477578466 (-0.03%); split: -0.06%, +0.03%
InvThroughput: 87819847 -> 87830346 (+0.01%); split: -0.03%, +0.04%
VClause: 673353 -> 673829 (+0.07%); split: -0.04%, +0.11%
SClause: 1385396 -> 1366478 (-1.37%); split: -1.65%, +0.29%
Copies: 2327965 -> 2229134 (-4.25%); split: -4.58%, +0.34%
Branches: 906707 -> 906434 (-0.03%); split: -0.13%, +0.10%
PreSGPRs: 1874153 -> 1862698 (-0.61%); split: -1.34%, +0.73%
PreVGPRs: 1691382 -> 1691383 (+0.00%); split: -0.00%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12773>
---
 src/amd/common/ac_shader_util.h               |  11 +
 .../compiler/aco_instruction_selection.cpp    |  88 ++------
 src/amd/llvm/ac_nir_to_llvm.c                 |   8 +-
 src/amd/llvm/ac_shader_abi.h                  |  17 +-
 .../vulkan/radv_nir_apply_pipeline_layout.c   | 211 +++++++++++++++++-
 src/amd/vulkan/radv_nir_to_llvm.c             | 111 +--------
 src/amd/vulkan/radv_shader.c                  |   1 -
 src/amd/vulkan/radv_shader.h                  |   1 -
 src/gallium/drivers/radeonsi/si_shader_llvm.c |   1 -
 9 files changed, 257 insertions(+), 192 deletions(-)

diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h
index 5d7ee8f7e73..129f6ebe93d 100644
--- a/src/amd/common/ac_shader_util.h
+++ b/src/amd/common/ac_shader_util.h
@@ -79,6 +79,17 @@ enum ac_fetch_format
    AC_FETCH_FORMAT_NONE,
 };
 
+enum ac_descriptor_type
+{
+   AC_DESC_IMAGE,
+   AC_DESC_FMASK,
+   AC_DESC_SAMPLER,
+   AC_DESC_BUFFER,
+   AC_DESC_PLANE_0,
+   AC_DESC_PLANE_1,
+   AC_DESC_PLANE_2,
+};
+
 unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask);
 
 unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format);
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 504b54253fe..cf5f1ade632 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1355,7 +1355,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
    case nir_op_vec2:
    case nir_op_vec3:
    case nir_op_vec4:
-   case nir_op_vec5: {
+   case nir_op_vec5:
+   case nir_op_vec8:
+   case nir_op_vec16: {
       std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
       unsigned num = instr->dest.dest.ssa.num_components;
       for (unsigned i = 0; i < num; ++i)
@@ -8967,70 +8969,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
    }
 }
 
-void
-tex_fetch_ptrs(isel_context* ctx, nir_tex_instr* instr, Temp* res_ptr, Temp* samp_ptr,
-               enum glsl_base_type* stype)
-{
-   nir_deref_instr* texture_deref_instr = NULL;
-   nir_deref_instr* sampler_deref_instr = NULL;
-   int plane = -1;
-
-   for (unsigned i = 0; i < instr->num_srcs; i++) {
-      switch (instr->src[i].src_type) {
-      case nir_tex_src_texture_deref:
-         texture_deref_instr = nir_src_as_deref(instr->src[i].src);
-         break;
-      case nir_tex_src_sampler_deref:
-         sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
-         break;
-      case nir_tex_src_plane: plane = nir_src_as_int(instr->src[i].src); break;
-      default: break;
-      }
-   }
-
-   *stype = glsl_get_sampler_result_type(texture_deref_instr->type);
-
-   if (!sampler_deref_instr)
-      sampler_deref_instr = texture_deref_instr;
-
-   if (plane >= 0) {
-      assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
-      *res_ptr = get_sampler_desc(ctx, texture_deref_instr,
-                                  (aco_descriptor_type)(ACO_DESC_PLANE_0 + plane), instr, false);
-   } else if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
-      *res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_BUFFER, instr, false);
-   } else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
-      *res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_FMASK, instr, false);
-   } else {
-      *res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_IMAGE, instr, false);
-   }
-   if (samp_ptr) {
-      *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, ACO_DESC_SAMPLER, instr, false);
-
-      if (ctx->options->disable_aniso_single_level &&
-          instr->sampler_dim < GLSL_SAMPLER_DIM_RECT && ctx->options->chip_class < GFX8) {
-         /* fix sampler aniso on SI/CI: samp[0] = samp[0] & img[7] */
-         Builder bld(ctx->program, ctx->block);
-
-         /* to avoid unnecessary moves, we split and recombine sampler and image */
-         Temp img[8] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1),
-                        bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)};
-         Temp samp[4] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)};
-         bld.pseudo(aco_opcode::p_split_vector, Definition(img[0]), Definition(img[1]),
-                    Definition(img[2]), Definition(img[3]), Definition(img[4]), Definition(img[5]),
-                    Definition(img[6]), Definition(img[7]), *res_ptr);
-         bld.pseudo(aco_opcode::p_split_vector, Definition(samp[0]), Definition(samp[1]),
-                    Definition(samp[2]), Definition(samp[3]), *samp_ptr);
-
-         samp[0] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), samp[0], img[7]);
-         *res_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8), img[0], img[1], img[2],
-                               img[3], img[4], img[5], img[6], img[7]);
-         *samp_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), samp[0], samp[1], samp[2],
-                                samp[3]);
-      }
-   }
-}
-
 void
 build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc,
                   Temp* out_tc)
@@ -9178,11 +9116,21 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
    std::vector<Temp> coords;
    std::vector<Temp> derivs;
    nir_const_value* const_offset[4] = {NULL, NULL, NULL, NULL};
-   enum glsl_base_type stype;
-   tex_fetch_ptrs(ctx, instr, &resource, &sampler, &stype);
+
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      switch (instr->src[i].src_type) {
+      case nir_tex_src_texture_handle:
+         resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
+         break;
+      case nir_tex_src_sampler_handle:
+         sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
+         break;
+      default: break;
+      }
+   }
 
    bool tg4_integer_workarounds = ctx->options->chip_class <= GFX8 && instr->op == nir_texop_tg4 &&
-                                  (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT);
+                                  (instr->dest_type & (nir_type_int | nir_type_uint));
    bool tg4_integer_cube_workaround =
       tg4_integer_workarounds && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
 
@@ -9476,7 +9424,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
                                          Operand::c32(V_008F14_IMG_DATA_FORMAT_8_8_8_8));
 
          Temp nfmt;
-         if (stype == GLSL_TYPE_UINT) {
+         if (instr->dest_type & nir_type_uint) {
             nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1),
                             Operand::c32(V_008F14_IMG_NUM_FORMAT_USCALED),
                             Operand::c32(V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa));
@@ -9753,7 +9701,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
       for (unsigned i = 0; i < 4; i++) {
          val[i] = emit_extract_vector(ctx, tmp_dst, i, v1);
          Temp cvt_val;
-         if (stype == GLSL_TYPE_UINT)
+         if (instr->dest_type & nir_type_uint)
             cvt_val = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), val[i]);
          else
             cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]);
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 7b1e593d663..37e76f34f81 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -559,7 +559,7 @@ static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_
 
 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 {
-   LLVMValueRef src[4], result = NULL;
+   LLVMValueRef src[16], result = NULL;
    unsigned num_components = instr->dest.dest.ssa.num_components;
    unsigned src_components;
    LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
@@ -570,6 +570,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
    case nir_op_vec3:
    case nir_op_vec4:
    case nir_op_vec5:
+   case nir_op_vec8:
+   case nir_op_vec16:
    case nir_op_unpack_32_2x16:
    case nir_op_unpack_64_2x32:
    case nir_op_unpack_64_4x16:
@@ -957,6 +959,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
    case nir_op_vec3:
    case nir_op_vec4:
    case nir_op_vec5:
+   case nir_op_vec8:
+   case nir_op_vec16:
       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
          src[i] = ac_to_integer(&ctx->ac, src[i]);
       result = ac_build_gather_values(&ctx->ac, src, num_components);
@@ -4486,7 +4490,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValue
    LLVMBuilderRef builder = ctx->ac.builder;
    LLVMValueRef img7, samp0;
 
-   if (ctx->ac.chip_class >= GFX8 || !ctx->abi->disable_aniso_single_level)
+   if (ctx->ac.chip_class >= GFX8)
       return samp;
 
    img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index 0e7126831e4..2d7e553bd02 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -25,6 +25,7 @@
 #define AC_SHADER_ABI_H
 
 #include "ac_shader_args.h"
+#include "ac_shader_util.h"
 #include "compiler/shader_enums.h"
 #include <llvm-c/Core.h>
 
@@ -34,17 +35,6 @@
 
 #define AC_MAX_INLINE_PUSH_CONSTS 8
 
-enum ac_descriptor_type
-{
-   AC_DESC_IMAGE,
-   AC_DESC_FMASK,
-   AC_DESC_SAMPLER,
-   AC_DESC_BUFFER,
-   AC_DESC_PLANE_0,
-   AC_DESC_PLANE_1,
-   AC_DESC_PLANE_2,
-};
-
 /* Document the shader ABI during compilation. This is what allows radeonsi and
  * radv to share a compiler backend.
  */
@@ -159,11 +149,6 @@ struct ac_shader_abi {
     */
    bool adjust_frag_coord_z;
 
-   /* Whether anisotropic filtering should be disabled for single level
-    * images.
-    */
-   bool disable_aniso_single_level;
-
    /* Whether to inline the compute dispatch size in user sgprs. */
    bool load_grid_size_from_user_sgpr;
 };
diff --git a/src/amd/vulkan/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c
index 3901972be64..038fc561bd9 100644
--- a/src/amd/vulkan/radv_nir_apply_pipeline_layout.c
+++ b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  *
  */
+#include "ac_shader_util.h"
 #include "nir.h"
 #include "nir_builder.h"
 #include "radv_private.h"
@@ -30,6 +31,7 @@
 typedef struct {
    enum chip_class chip_class;
    uint32_t address32_hi;
+   bool disable_aniso_single_level;
 
    const struct radv_shader_args *args;
    const struct radv_shader_info *info;
@@ -218,6 +220,122 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins
    nir_instr_remove(&intrin->instr);
 }
 
+static nir_ssa_def *
+get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref,
+                 enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex)
+{
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+   assert(var);
+   unsigned desc_set = var->data.descriptor_set;
+   unsigned binding_index = var->data.binding;
+   bool indirect = nir_deref_instr_has_indirect(deref);
+
+   struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout;
+   struct radv_descriptor_set_binding_layout *binding = &layout->binding[binding_index];
+
+   /* Handle immutable (compile-time) samplers (VkDescriptorSetLayoutBinding::pImmutableSamplers)
+    * We can only do this for constant array index or if all samplers in the array are the same.
+    */
+   if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
+       (!indirect || binding->immutable_samplers_equal)) {
+      unsigned constant_index = 0;
+      if (!binding->immutable_samplers_equal) {
+         while (deref->deref_type != nir_deref_type_var) {
+            assert(deref->deref_type == nir_deref_type_array);
+            unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
+            constant_index += nir_src_as_uint(deref->arr.index) * array_size;
+            deref = nir_deref_instr_parent(deref);
+         }
+      }
+
+      const uint32_t *samplers = radv_immutable_samplers(layout, binding);
+      return nir_imm_ivec4(b, samplers[constant_index * 4 + 0], samplers[constant_index * 4 + 1],
+                           samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]);
+   }
+
+   unsigned size = 8;
+   unsigned offset = binding->offset;
+   switch (desc_type) {
+   case AC_DESC_IMAGE:
+   case AC_DESC_PLANE_0:
+      break;
+   case AC_DESC_FMASK:
+   case AC_DESC_PLANE_1:
+      offset += 32;
+      break;
+   case AC_DESC_SAMPLER:
+      size = 4;
+      if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+         offset += radv_combined_image_descriptor_sampler_offset(binding);
+      break;
+   case AC_DESC_BUFFER:
+      size = 4;
+      break;
+   case AC_DESC_PLANE_2:
+      size = 4;
+      offset += 64;
+      break;
+   }
+
+   nir_ssa_def *index = NULL;
+   while (deref->deref_type != nir_deref_type_var) {
+      assert(deref->deref_type == nir_deref_type_array);
+      unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
+      array_size *= binding->size;
+
+      nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
+      if (tmp != deref->arr.index.ssa)
+         nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true;
+
+      if (index) {
+         index = nir_iadd(b, tmp, index);
+         nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
+      } else {
+         index = tmp;
+      }
+
+      deref = nir_deref_instr_parent(deref);
+   }
+
+   nir_ssa_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
+   if (index && index_offset != index)
+      nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true;
+
+   if (non_uniform)
+      return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
+
+   nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
+   nir_ssa_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
+
+   /* 3 plane formats always have same size and format for plane 1 & 2, so
+    * use the tail from plane 1 so that we can store only the first 16 bytes
+    * of the last plane. */
+   if (desc_type == AC_DESC_PLANE_2) {
+      nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex);
+
+      nir_ssa_def *comp[8];
+      for (unsigned i = 0; i < 4; i++)
+         comp[i] = nir_channel(b, desc, i);
+      for (unsigned i = 4; i < 8; i++)
+         comp[i] = nir_channel(b, desc2, i);
+
+      return nir_vec(b, comp, 8);
+   } else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4) {
+      nir_ssa_def *comp[4];
+      for (unsigned i = 0; i < 4; i++)
+         comp[i] = nir_channel(b, desc, i);
+
+      /* We want to always use the linear filtering truncation behaviour for
+       * nir_texop_tg4, even if the sampler uses nearest/point filtering.
+       */
+      comp[0] = nir_iand_imm(b, comp[0], C_008F30_TRUNC_COORD);
+
+      return nir_vec(b, comp, 4);
+   }
+
+   return desc;
+}
+
 static void
 apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
 {
@@ -263,6 +381,94 @@ apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_
    }
 }
 
+static void
+apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *tex)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+
+   nir_deref_instr *texture_deref_instr = NULL;
+   nir_deref_instr *sampler_deref_instr = NULL;
+   int plane = -1;
+
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      switch (tex->src[i].src_type) {
+      case nir_tex_src_texture_deref:
+         texture_deref_instr = nir_src_as_deref(tex->src[i].src);
+         break;
+      case nir_tex_src_sampler_deref:
+         sampler_deref_instr = nir_src_as_deref(tex->src[i].src);
+         break;
+      case nir_tex_src_plane:
+         plane = nir_src_as_int(tex->src[i].src);
+         break;
+      default:
+         break;
+      }
+   }
+
+   nir_ssa_def *image = NULL;
+   nir_ssa_def *sampler = NULL;
+   if (plane >= 0) {
+      assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
+      assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
+      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane,
+                               tex->texture_non_uniform, tex);
+   } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER,
+                               tex->texture_non_uniform, tex);
+   } else if (tex->op == nir_texop_fragment_mask_fetch_amd ||
+              tex->op == nir_texop_samples_identical) {
+      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK,
+                               tex->texture_non_uniform, tex);
+   } else {
+      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE,
+                               tex->texture_non_uniform, tex);
+   }
+
+   if (sampler_deref_instr) {
+      sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER,
+                                 tex->sampler_non_uniform, tex);
+
+      if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT &&
+          state->chip_class < GFX8) {
+         /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
+          *
+          * GFX6-GFX7:
+          *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
+          *   filtering manually. The driver sets img7 to a mask clearing
+          *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
+          *     s_and_b32 samp0, samp0, img7
+          *
+          * GFX8:
+          *   The ANISO_OVERRIDE sampler field enables this fix in TA.
+          */
+         /* TODO: This is unnecessary for combined image+sampler.
+          * We can do this when updating the desc set. */
+         nir_ssa_def *comp[4];
+         for (unsigned i = 0; i < 4; i++)
+            comp[i] = nir_channel(b, sampler, i);
+         comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7));
+
+         sampler = nir_vec(b, comp, 4);
+      }
+   }
+
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      switch (tex->src[i].src_type) {
+      case nir_tex_src_texture_deref:
+         tex->src[i].src_type = nir_tex_src_texture_handle;
+         nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, image);
+         break;
+      case nir_tex_src_sampler_deref:
+         tex->src[i].src_type = nir_tex_src_sampler_handle;
+         nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, sampler);
+         break;
+      default:
+         break;
+      }
+   }
+}
+
 void
 radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
                                const struct radv_pipeline_layout *layout,
@@ -272,6 +478,7 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
    apply_layout_state state = {
       .chip_class = device->physical_device->rad_info.chip_class,
       .address32_hi = device->physical_device->rad_info.address32_hi,
+      .disable_aniso_single_level = device->instance->disable_aniso_single_level,
       .args = args,
       .info = info,
       .pipeline_layout = layout,
@@ -291,7 +498,9 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
        */
       nir_foreach_block_reverse (block, function->impl) {
          nir_foreach_instr_reverse_safe (instr, block) {
-            if (instr->type == nir_instr_type_intrinsic)
+            if (instr->type == nir_instr_type_tex)
+               apply_layout_to_tex(&b, &state, nir_instr_as_tex(instr));
+            else if (instr->type == nir_instr_type_intrinsic)
                apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr));
          }
       }
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index ea93ed5b7fa..2d27b944547 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -403,118 +403,30 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
                       enum ac_descriptor_type desc_type, bool image, bool write, bool bindless)
 {
    struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-   LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
-   struct radv_descriptor_set_layout *layout =
-      ctx->options->layout->set[descriptor_set].layout;
-   struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
-   unsigned offset = binding->offset;
-   unsigned stride = binding->size;
-   unsigned type_size;
-   LLVMBuilderRef builder = ctx->ac.builder;
-   LLVMTypeRef type;
 
-   assert(base_index < layout->binding_count);
-
-   if (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && desc_type == AC_DESC_FMASK)
+   if (image && desc_type == AC_DESC_FMASK)
       return NULL;
 
-   switch (desc_type) {
-   case AC_DESC_IMAGE:
-      type = ctx->ac.v8i32;
-      type_size = 32;
-      break;
-   case AC_DESC_FMASK:
-      type = ctx->ac.v8i32;
-      offset += 32;
-      type_size = 32;
-      break;
-   case AC_DESC_SAMPLER:
-      type = ctx->ac.v4i32;
-      if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
-         offset += radv_combined_image_descriptor_sampler_offset(binding);
-      }
-
-      type_size = 16;
-      break;
-   case AC_DESC_BUFFER:
-      type = ctx->ac.v4i32;
-      type_size = 16;
-      break;
-   case AC_DESC_PLANE_0:
-   case AC_DESC_PLANE_1:
-   case AC_DESC_PLANE_2:
-      type = ctx->ac.v8i32;
-      type_size = 32;
-      offset += 32 * (desc_type - AC_DESC_PLANE_0);
-      break;
-   default:
-      unreachable("invalid desc_type\n");
-   }
-
-   offset += constant_index * stride;
-
-   if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
-       (!index || binding->immutable_samplers_equal)) {
-      if (binding->immutable_samplers_equal)
-         constant_index = 0;
-
-      const uint32_t *samplers = radv_immutable_samplers(layout, binding);
-
-      LLVMValueRef constants[] = {
-         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
-         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
-         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
-         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
-      };
-      return ac_build_gather_values(&ctx->ac, constants, 4);
-   }
-
-   assert(stride % type_size == 0);
-
-   LLVMValueRef adjusted_index = index;
-   if (!adjusted_index)
-      adjusted_index = ctx->ac.i32_0;
-
-   adjusted_index =
-      LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
-
-   LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0);
-   list = LLVMBuildGEP(builder, list, &val_offset, 1, "");
-   list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(type), "");
-
-   LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
-
    /* 3 plane formats always have same size and format for plane 1 & 2, so
     * use the tail from plane 1 so that we can store only the first 16 bytes
     * of the last plane. */
-   if (desc_type == AC_DESC_PLANE_2) {
-      LLVMValueRef descriptor2 =
-         radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index,
-                               AC_DESC_PLANE_1, image, write, bindless);
+   if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) {
+      LLVMValueRef plane1_addr =
+         LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), "");
+      LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32);
+      LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32);
 
       LLVMValueRef components[8];
       for (unsigned i = 0; i < 4; ++i)
-         components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
+         components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
 
       for (unsigned i = 4; i < 8; ++i)
-         components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
-      descriptor = ac_build_gather_values(&ctx->ac, components, 8);
-   } else if (desc_type == AC_DESC_IMAGE &&
-              ctx->options->has_image_load_dcc_bug &&
-              image && !write) {
-      LLVMValueRef components[8];
-
-      for (unsigned i = 0; i < 8; i++)
-         components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
-
-      /* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a hardware bug. */
-      components[6] = LLVMBuildAnd(ctx->ac.builder, components[6],
-                                   LLVMConstInt(ctx->ac.i32, C_00A018_WRITE_COMPRESS_ENABLE, false), "");
-
-      descriptor = ac_build_gather_values(&ctx->ac, components, 8);
+         components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor1, i);
+      return ac_build_gather_values(&ctx->ac, components, 8);
    }
 
-   return descriptor;
+   bool v4 = desc_type == AC_DESC_BUFFER || desc_type == AC_DESC_SAMPLER;
+   return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32);
 }
 
 static LLVMValueRef
@@ -2223,7 +2135,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
    ctx.abi.clamp_shadow_reference = false;
    ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z;
    ctx.abi.robust_buffer_access = options->robust_buffer_access;
-   ctx.abi.disable_aniso_single_level = options->disable_aniso_single_level;
    ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
 
    bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index d3532aec404..045f5ba17a3 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1889,7 +1889,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
    options->enable_mrt_output_nan_fixup =
       module && !is_meta_shader(module->nir) && options->key.ps.enable_mrt_output_nan_fixup;
    options->adjust_frag_coord_z = options->key.adjust_frag_coord_z;
-   options->disable_aniso_single_level = options->key.disable_aniso_single_level;
    options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
    options->debug.func = radv_compiler_debug;
    options->debug.private_data = &debug_data;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index bb6e1332728..e6b981b0912 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -125,7 +125,6 @@ struct radv_nir_compiler_options {
    bool has_image_load_dcc_bug;
    bool enable_mrt_output_nan_fixup;
    bool wgp_mode;
-   bool disable_aniso_single_level;
    enum radeon_family family;
    enum chip_class chip_class;
    const struct radeon_info *info;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 62d24451817..0526b33737e 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -520,7 +520,6 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
    ctx->abi.robust_buffer_access = true;
    ctx->abi.convert_undef_to_zero = true;
    ctx->abi.adjust_frag_coord_z = false;
-   ctx->abi.disable_aniso_single_level = true;
    ctx->abi.load_grid_size_from_user_sgpr = true;
 
    const struct si_shader_info *info = &ctx->shader->selector->info;