r600/sfn: Lower tex,txl,txb and txf to backend

This cleans up the texture code a bit and also gives more opportunities for optimization in NIR. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18619>
2022-09-05 09:21:38 +02:00
parent a6483f0434
commit 02bb506c54
3 changed files with 261 additions and 91 deletions
--- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp
@@ -27,8 +27,11 @@
 #include "sfn_instr_tex.h"
 #include "sfn_instr_alu.h"
 #include "sfn_instr_fetch.h"
+#include "sfn_nir.h"
 #include "sfn_debug.h"

+#include "nir_builder.h"
+
 namespace r600 {

 using std::string;
@@ -322,6 +325,10 @@ bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
 {
   Inputs src(*tex, shader.value_factory());

+   if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) != -1)
+      return emit_lowered_tex(tex, src, shader);
+
+
   if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
      switch (tex->op) {
      case nir_texop_txs:
@@ -333,13 +340,6 @@ bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
      }
   } else {
      switch (tex->op) {
-      case nir_texop_tex:
-         return emit_tex_tex(tex, src, shader);
-      case nir_texop_txf:
-         return emit_tex_txf(tex, src, shader);
-      case nir_texop_txb:
-      case nir_texop_txl:
-         return emit_tex_txl_txb(tex, src, shader);
      case nir_texop_txs:
         return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
      case nir_texop_lod:
@@ -381,11 +381,12 @@ get_sampler_id(int sampler_id, const nir_variable *deref)
   return result;
 }

-
-bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
+bool TexInstr::emit_lowered_tex(nir_tex_instr* tex, Inputs& src, Shader& shader)
 {
-   auto& vf = shader.value_factory();
+   assert(src.backend1);
+   assert(src.backend2);

+   auto& vf = shader.value_factory();
   sfn_log << SfnLog::instr << "emit '"
                 << *reinterpret_cast<nir_instr*>(tex)
                 << "' (" << __func__ << ")\n";
@@ -393,99 +394,33 @@ bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
   assert(!sampler.indirect);

-   auto src_coord = prepare_source(tex, src, shader);
   auto dst = vf.dest_vec4(tex->dest, pin_group);

-   auto irt = new TexInstr(src.opcode, dst, {0,1,2,3},  src_coord, sampler.id,
-                           sampler.id + R600_MAX_CONST_BUFFERS,
-                           src.sampler_offset);
-   if (tex->is_array)
-      irt->set_tex_flag(TexInstr::z_unnormalized);
+   auto params = nir_src_as_const_value(*src.backend2);
+   int32_t coord_mask = params[0].i32;
+   int32_t flags = params[1].i32;

-   irt->set_rect_coordinate_flags(tex);
-   irt->set_coord_offsets(src.offset);
+   RegisterVec4::Swizzle src_swizzle = {0};
+   for (int i = 0; i < 4; ++i)
+      src_swizzle[i] = (coord_mask & (1 << i)) ? i : 7;

-   shader.emit_instruction(irt);
-   return true;
-}
-
-bool TexInstr::emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader)
-{
-   auto& vf = shader.value_factory();
-
-   auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
-   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
-
-   auto src_coord = prepare_source(tex, src, shader);
-
-   auto dst = vf.dest_vec4(tex->dest, pin_group);
+   auto src_coord = vf.src_vec4(*src.backend1, pin_group, src_swizzle);

   auto irt = new TexInstr(src.opcode, dst, {0,1,2,3},  src_coord, sampler.id,
                           sampler.id + R600_MAX_CONST_BUFFERS,
                           src.sampler_offset);

-   if (tex->is_array)
-      irt->set_tex_flag(TexInstr::z_unnormalized);
+   for (const auto f : TexFlags) {
+      if (flags & (1 << f))
+         irt->set_tex_flag(f);
+   }

-   irt->set_rect_coordinate_flags(tex);
   irt->set_coord_offsets(src.offset);

   shader.emit_instruction(irt);
   return true;
 }

-
-bool TexInstr::emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
-{
-   auto& vf = shader.value_factory();
-
-   int sampler = tex->sampler_index;
-
-   auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
-   swizzle[3] = 3;
-
-   if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
-      swizzle[2] = 1;
-      swizzle[1] = 7;
-   }
-
-   auto src_coord = vf.temp_vec4(pin_group, swizzle);
-
-   for (unsigned i = 0; i < tex->coord_components; i++) {
-      unsigned k = i;
-      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
-         k = 2;
-
-
-      if (src.offset) {
-         shader.emit_instruction(new AluInstr(op2_add_int, src_coord[k], src.coord[i],
-                                              vf.src(src.offset[i], i),
-                                              AluInstr::write));
-      } else {
-         shader.emit_instruction(new AluInstr(op1_mov, src_coord[k], src.coord[i],AluInstr::write));
-      }
-   }
-
-   shader.emit_instruction(new AluInstr(op1_mov, src_coord[3], src.lod, AluInstr::last_write));
-
-   auto dst = vf.dest_vec4(tex->dest, pin_group);
-
-   auto tex_ir = new TexInstr(src.opcode, dst, {0, 1, 2, 3}, src_coord,
-                              sampler,
-                              sampler + R600_MAX_CONST_BUFFERS,
-                              src.sampler_offset);
-
-   if (tex->is_array)
-      tex_ir->set_tex_flag(z_unnormalized);
-
-   tex_ir->set_rect_coordinate_flags(tex);
-   tex_ir->set_sampler_offset(src.sampler_offset);
-
-   shader.emit_instruction(tex_ir);
-
-   return true;
-}
-
 bool TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
 {
   auto& vf = shader.value_factory();
@@ -843,7 +778,7 @@ bool TexInstr::emit_tex_tg4(nir_tex_instr* tex, Inputs& src , Shader& shader)
   if (src.offset) {
      literal_offset =  nir_src_as_const_value(*src.offset) != 0;
      r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
-                       (literal_offset ? "literal" : "varying") <<
+                       (literal_offset ? "l" : "varying") <<
                       "\n";

      if (!literal_offset) {
@@ -956,6 +891,8 @@ TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
   ms_index(nullptr),
   sampler_offset(nullptr),
   texture_offset(nullptr),
+   backend1(nullptr),
+   backend2(nullptr),
   opcode(ld)
 {
   //sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
@@ -1004,6 +941,12 @@ TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
      case nir_tex_src_sampler_offset:
         sampler_offset = vf.src(instr.src[i], 0);
      break;
+      case nir_tex_src_backend1:
+         backend1 = &instr.src[i].src;
+         break;
+      case nir_tex_src_backend2:
+         backend2 = &instr.src[i].src;
+         break;
      case nir_tex_src_plane:
      case nir_tex_src_projector:
      case nir_tex_src_min_lod:
@@ -1109,5 +1052,218 @@ void TexInstr::set_rect_coordinate_flags(nir_tex_instr* instr)
   }
 }

+class LowerTexToBackend : public NirLowerInstruction {
+public:
+   LowerTexToBackend(amd_gfx_level chip_class);
+private:
+   bool filter(const nir_instr *instr) const override;
+	nir_ssa_def *lower(nir_instr *instr) override;
+
+   nir_ssa_def *lower_tex(nir_tex_instr *tex);
+   nir_ssa_def *lower_txf(nir_tex_instr *tex);
+
+   nir_ssa_def *prepare_coord(nir_tex_instr *tex,
+                              int &unnormalized_mask,
+                              int &used_coord_mask);
+   int get_src_coords(nir_tex_instr *tex, std::array<nir_ssa_def *, 4> &coord,
+                      bool round_array_index);
+   nir_ssa_def *prep_src(std::array<nir_ssa_def *, 4> &coord,
+                         int &used_coord_mask);
+   nir_ssa_def *finalize(nir_tex_instr *tex,
+                         nir_ssa_def *backend1, nir_ssa_def *backend2);
+
+   amd_gfx_level m_chip_class;
+};
+
+bool r600_nir_lower_tex_to_backend(nir_shader *shader, amd_gfx_level chip_class)
+{
+   return LowerTexToBackend(chip_class).run(shader);
+}
+
+LowerTexToBackend::LowerTexToBackend(amd_gfx_level chip_class):
+   m_chip_class(chip_class)
+{
+
+}
+
+bool LowerTexToBackend::filter(const nir_instr *instr) const
+{
+   if (instr->type != nir_instr_type_tex)
+      return false;
+
+   auto tex = nir_instr_as_tex(instr);
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+      return false;
+   switch (tex->op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txl:
+   case nir_texop_txf:
+      break;
+   default:
+      return false;
+   }
+
+   return nir_tex_instr_src_index(tex, nir_tex_src_backend1) == -1;
+}
+
+nir_ssa_def *LowerTexToBackend::lower(nir_instr *instr)
+{
+   b->cursor = nir_before_instr(instr);
+
+   auto tex = nir_instr_as_tex(instr);
+   switch (tex->op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txl:
+      return lower_tex(tex);
+   case nir_texop_txf:
+      return lower_txf(tex);
+
+   default:
+      return nullptr;
+   }
+}
+
+nir_ssa_def *LowerTexToBackend::lower_tex(nir_tex_instr *tex)
+{
+   int unnormalized_mask = 0;
+   int used_coord_mask = 0;
+
+   nir_ssa_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask);
+
+   nir_ssa_def *backend2 =
+         nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, 0, 0);
+
+   return finalize(tex, backend1, backend2);
+}
+
+nir_ssa_def *LowerTexToBackend::lower_txf(nir_tex_instr *tex)
+{
+   std::array<nir_ssa_def *, 4> new_coord = {
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr
+   };
+
+   get_src_coords(tex, new_coord, false);
+
+   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
+   new_coord[3] = tex->src[lod_idx].src.ssa;
+
+   int used_coord_mask = 0;
+   nir_ssa_def *backend1 = prep_src(new_coord, used_coord_mask);
+   nir_ssa_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0);
+
+   return finalize(tex, backend1, backend2);
+}
+
+nir_ssa_def *LowerTexToBackend::finalize(nir_tex_instr *tex, nir_ssa_def *backend1,
+                                         nir_ssa_def *backend2)
+{
+   nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(backend1));
+   nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_src_for_ssa(backend2));
+   nir_tex_instr_remove_src(tex, nir_tex_src_coord);
+
+   static const nir_tex_src_type cleanup[] = {
+      nir_tex_src_coord,
+      nir_tex_src_lod,
+      nir_tex_src_bias,
+      nir_tex_src_comparator
+   };
+
+   for (const auto type : cleanup) {
+      int pos = nir_tex_instr_src_index(tex, type);
+      if (pos >= 0)
+         nir_tex_instr_remove_src(tex, pos);
+   }
+   return NIR_LOWER_INSTR_PROGRESS;
+}
+
+nir_ssa_def *LowerTexToBackend::prep_src(std::array<nir_ssa_def *, 4> &coord,
+                                         int &used_coord_mask)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (coord[i])
+         used_coord_mask |= 1 << i;
+      else
+         coord[i] = nir_ssa_undef(b, 1, 32);
+   }
+
+   return nir_vec(b, coord.data(), 4);
+}
+
+nir_ssa_def *LowerTexToBackend::prepare_coord(nir_tex_instr *tex,
+                                              int &unnormalized_mask,
+                                              int &used_coord_mask)
+{
+   std::array<nir_ssa_def *, 4> new_coord = {
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr
+   };
+
+   unnormalized_mask = get_src_coords(tex, new_coord, true);
+   used_coord_mask = 0;
+
+   int comp_idx = tex->is_shadow ?
+                       nir_tex_instr_src_index(tex, nir_tex_src_comparator):
+                       -1;
+
+   if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
+      int idx = tex->op == nir_texop_txl ?
+                   nir_tex_instr_src_index(tex, nir_tex_src_lod) :
+                   nir_tex_instr_src_index(tex, nir_tex_src_bias);
+      assert(idx != -1);
+      new_coord[3] = tex->src[idx].src.ssa;
+
+      if (comp_idx >= 0)
+         new_coord[2] = tex->src[comp_idx].src.ssa;
+   } else if (comp_idx >= 0) {
+      new_coord[3] = tex->src[comp_idx].src.ssa;
+   }
+   return prep_src(new_coord, used_coord_mask);
+}
+
+int LowerTexToBackend::get_src_coords(nir_tex_instr *tex,
+                                      std::array<nir_ssa_def *, 4>& coord,
+                                      bool round_array_index)
+{
+   int unnormalized_mask = 0;
+   auto coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   assert(coord_idx != -1);
+   auto old_coord = tex->src[coord_idx];
+
+   coord = {
+      nir_channel(b, old_coord.src.ssa, 0),
+      nullptr,
+      nullptr,
+      nullptr
+   };
+
+   if (tex->coord_components > 1) {
+      if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D)
+         coord[2] = nir_channel(b, old_coord.src.ssa, 1);
+      else
+         coord[1] = nir_channel(b, old_coord.src.ssa, 1);
+   }
+
+   if (tex->coord_components > 2) {
+         coord[2] = nir_channel(b, old_coord.src.ssa, 2);
+   }
+   if (tex->is_array) {
+      unnormalized_mask |= 0x4;
+      if (round_array_index)
+         coord[2] = nir_fround_even(b, coord[2]);
+   }
+
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+      unnormalized_mask |= 0x3;
+   }
+
+   return unnormalized_mask;
+}

 }
--- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h
@@ -76,6 +76,15 @@ public:
      num_tex_flag
   };

+   static constexpr Flags TexFlags[] = {
+      x_unnormalized,
+      y_unnormalized,
+      z_unnormalized,
+      w_unnormalized,
+      grad_fine,
+      num_tex_flag
+   };
+
   struct Inputs {
      Inputs(const nir_tex_instr& instr, ValueFactory &vf);
      const nir_variable *sampler_deref;
@@ -91,6 +100,8 @@ public:
      PVirtualValue ms_index;
      PVirtualValue sampler_offset;
      PVirtualValue texture_offset;
+      nir_src *backend1;
+      nir_src *backend2;

      RegisterVec4::Swizzle swizzle_from_ncomps(int comps) const;

@@ -158,17 +169,15 @@ private:
   static auto prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader &shader) -> RegisterVec4;

   static bool emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
-   static bool emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader);
   static bool emit_tex_tex_ms_direct(nir_tex_instr *tex, Inputs& src, Shader& shader);
   static bool emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader);
-   static bool emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader);
-   static bool emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader);
   static bool emit_tex_txs(nir_tex_instr *tex, Inputs& src,
                            RegisterVec4::Swizzle dest_swz, Shader& shader);
   static bool emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader);
   static bool emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader);
   static bool emit_tex_tg4(nir_tex_instr* instr, Inputs& src , Shader& shader);
   static bool emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader);
+   static bool emit_lowered_tex(nir_tex_instr* instr, Inputs& src, Shader& shader);

   void set_coord_offsets(nir_src *offset);
   void set_rect_coordinate_flags(nir_tex_instr* instr);
@@ -188,6 +197,8 @@ private:
   std::list<TexInstr *> m_prepare_instr;
 };

+bool r600_nir_lower_tex_to_backend(nir_shader *shader, amd_gfx_level chip_class);
+
 }

 #endif // INSTR_TEX_H
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -40,6 +40,7 @@
 #include "sfn_nir_lower_fs_out_to_vector.h"
 #include "sfn_nir_lower_alu.h"
 #include "sfn_nir_lower_tex.h"
+#include "sfn_instr_tex.h"
 #include "sfn_optimizer.h"
 #include "sfn_ra.h"
 #include "sfn_scheduler.h"
@@ -754,6 +755,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
   NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
   NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
   NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+   NIR_PASS_V(sh, r600::r600_nir_lower_tex_to_backend, rctx->b.gfx_level);
+

   NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
   NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);