radeonsi: move most "info" fields from si_shader_selector into si_shader_info

It's where they should be, and future commits might require this. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
2022-01-04 13:34:16 -05:00
parent b57a163b7d
commit 8de5b11b29
13 changed files with 251 additions and 249 deletions
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -934,7 +934,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
   LLVMValueRef position[4] = {};
   unsigned pos_index = 0;
   unsigned clip_plane_enable = SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(shader->key.ge.opt.ngg_culling);
-   unsigned clipdist_enable = (sel->clipdist_mask & clip_plane_enable) | sel->culldist_mask;
+   unsigned clipdist_enable = (sel->info.clipdist_mask & clip_plane_enable) | sel->info.culldist_mask;
   bool has_clipdist_mask = false;

   for (unsigned i = 0; i < info->num_outputs; i++) {
@@ -999,7 +999,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
      }
   }

-   if (clip_plane_enable && !sel->clipdist_mask) {
+   if (clip_plane_enable && !sel->info.clipdist_mask) {
      /* When clip planes are enabled and there are no clip distance outputs,
       * we should use user clip planes and cull against the position.
       */
@@ -1337,7 +1337,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
      ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE);
      ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + GFX9_GS_NUM_USER_SGPR);

-      for (unsigned i = 0; i < shader->selector->num_vbos_in_user_sgprs; i++) {
+      for (unsigned i = 0; i < shader->selector->info.num_vbos_in_user_sgprs; i++) {
         ret = si_insert_input_v4i32(ctx, ret, ctx->vb_descriptors[i],
                                     8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + i * 4);
      }
@@ -1349,8 +1349,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)

   unsigned vgpr;
   if (ctx->stage == MESA_SHADER_VERTEX) {
-      if (shader->selector->num_vbos_in_user_sgprs) {
-         vgpr = 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
+      if (shader->selector->info.num_vbos_in_user_sgprs) {
+         vgpr = 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->info.num_vbos_in_user_sgprs * 4;
      } else {
         vgpr = 8 + GFX9_GS_NUM_USER_SGPR + 1;
      }
@@ -1770,7 +1770,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL
         LLVMBuildStore(builder, out_val, ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
      }
   }
-   assert(out_idx * 4 == sel->gsvs_vertex_size);
+   assert(out_idx * 4 == info->gsvs_vertex_size);

   /* Determine and store whether this vertex completed a primitive. */
   const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
@@ -2227,8 +2227,8 @@ retry_select_mode:
         max_out_verts_per_gsprim = gs_sel->info.base.gs.vertices_out;
      }

-      esvert_lds_size = es_sel->esgs_itemsize / 4;
-      gsprim_lds_size = (gs_sel->gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
+      esvert_lds_size = es_sel->info.esgs_itemsize / 4;
+      gsprim_lds_size = (gs_sel->info.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;

      if (gsprim_lds_size > target_lds_size && !force_multi_cycling) {
         if (gs_sel->tess_turns_off_ngg || es_sel->info.stage != MESA_SHADER_TESS_EVAL) {
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -125,7 +125,7 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
      si_init_compiler(sscreen, compiler);

   assert(program->ir_type == PIPE_SHADER_IR_NIR);
-   si_nir_scan_shader(sel->nir, &sel->info);
+   si_nir_scan_shader(sscreen, sel->nir, &sel->info);

   si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
                            &sel->active_samplers_and_images);
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -122,7 +122,6 @@ extern "C" {

 #define SI_MAX_BORDER_COLORS              4096
 #define SI_MAX_VIEWPORTS                  16
-#define SI_USER_CLIP_PLANE_MASK           0x3F
 #define SI_MAP_BUFFER_ALIGNMENT           64
 /* We only support the minimum allowed value (512), so that we can pack a 3D block size
 * in 1 SGPR. */
@@ -1882,8 +1881,8 @@ static inline unsigned si_get_total_colormask(struct si_context *sctx)
      sctx->framebuffer.colorbuf_enabled_4bit & sctx->queued.named.blend->cb_target_mask;

   if (!ps->info.color0_writes_all_cbufs)
-      colormask &= ps->colors_written_4bit;
-   else if (!ps->colors_written_4bit)
+      colormask &= ps->info.colors_written_4bit;
+   else if (!ps->info.colors_written_4bit)
      colormask = 0; /* color0 writes all cbufs, but it's not written */

   return colormask;
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -281,7 +281,7 @@ static void declare_vb_descriptor_input_sgprs(struct si_shader_context *ctx)
 {
   ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->args.vertex_buffers);

-   unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
+   unsigned num_vbos_in_user_sgprs = ctx->shader->selector->info.num_vbos_in_user_sgprs;
   if (num_vbos_in_user_sgprs) {
      unsigned user_sgprs = ctx->args.num_sgprs_used;

@@ -496,14 +496,14 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)

         /* VS outputs passed via VGPRs to TCS. */
         if (shader->key.ge.opt.same_patch_vertices) {
-            unsigned num_outputs = util_last_bit64(shader->selector->outputs_written);
+            unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written);
            for (i = 0; i < num_outputs * 4; i++)
               ac_add_return(&ctx->args, AC_ARG_VGPR);
         }
      } else {
         /* TCS inputs are passed via VGPRs from VS. */
         if (shader->key.ge.opt.same_patch_vertices) {
-            unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->outputs_written);
+            unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written);
            for (i = 0; i < num_inputs * 4; i++)
               ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
         }
@@ -592,10 +592,10 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
             */
            num_user_sgprs = GFX9_GS_NUM_USER_SGPR + 1;

-            if (shader->selector->num_vbos_in_user_sgprs) {
+            if (shader->selector->info.num_vbos_in_user_sgprs) {
               assert(num_user_sgprs <= SI_SGPR_VS_VB_DESCRIPTOR_FIRST);
               num_user_sgprs =
-                  SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
+                  SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->info.num_vbos_in_user_sgprs * 4;
            }
         } else {
            num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
@@ -1319,7 +1319,7 @@ bool si_vs_needs_prolog(const struct si_shader_selector *sel,

   /* VGPR initialization fixup for Vega10 and Raven is always done in the
    * VS prolog. */
-   return sel->vs_needs_prolog || prolog_key->ls_vgpr_fix ||
+   return sel->info.vs_needs_prolog || prolog_key->ls_vgpr_fix ||
          /* The 2nd VS prolog loads input VGPRs from LDS */
          (key->ge.opt.ngg_culling && !ngg_cull_shader && !is_gs);
 }
@@ -1575,7 +1575,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
 void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
 {
   struct si_shader_info info;
-   si_nir_scan_shader(nir, &info);
+   si_nir_scan_shader(shader->selector->screen, nir, &info);

   shader->info.uses_vmem_load_other |= info.uses_vmem_load_other;
   shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
@@ -1915,7 +1915,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
      shader->info.uses_vmem_load_other = true;

   if (info->colors_read) {
-      ubyte *color = shader->selector->color_attr_index;
+      ubyte *color = shader->selector->info.color_attr_index;

      if (shader->key.ps.part.prolog.color_two_side) {
         /* BCOLORs are stored after the last input. */
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -156,6 +156,7 @@ struct si_context;

 #define SI_MAX_ATTRIBS    16
 #define SI_MAX_VS_OUTPUTS 40
+#define SI_USER_CLIP_PLANE_MASK  0x3F

 #define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29))

@@ -362,18 +363,43 @@ struct si_shader_info {
   ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
   ubyte output_type[PIPE_MAX_SHADER_OUTPUTS]; /* enum nir_alu_type */

-   ubyte color_interpolate[2];
-   ubyte color_interpolate_loc[2];
-
-   int constbuf0_num_slots;
+   ubyte num_vs_inputs;
+   ubyte num_vbos_in_user_sgprs;
   ubyte num_stream_output_components[4];
   uint16_t enabled_streamout_buffer_mask;

-   uint num_memory_stores;
+   uint64_t inputs_read; /* "get_unique_index" bits */
+   uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */

+   uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
+   uint64_t outputs_written;           /* "get_unique_index" bits */
+   uint32_t patch_outputs_written;     /* "get_unique_index_patch" bits */
+
+   ubyte clipdist_mask;
+   ubyte culldist_mask;
+
+   uint16_t lshs_vertex_stride;
+   uint16_t esgs_itemsize; /* vertex stride */
+   uint16_t gsvs_vertex_size;
+   ubyte gs_input_verts_per_prim;
+   unsigned max_gsvs_emit_size;
+
+   /* PS parameters */
+   unsigned db_shader_control;
+   /* Set 0xf or 0x0 (4 bits) per each written output.
+    * ANDed with spi_shader_col_format.
+    */
+   unsigned colors_written_4bit;
+
+   int constbuf0_num_slots;
+   uint num_memory_stores;
+   ubyte color_attr_index[2];
+   ubyte color_interpolate[2];
+   ubyte color_interpolate_loc[2];
   ubyte colors_read; /**< which color components are read by the FS */
   ubyte colors_written;
   uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */
+   bool vs_needs_prolog;
   bool color0_writes_all_cbufs; /**< gl_FragColor */
   bool reads_samplemask;   /**< does fragment shader read sample mask? */
   bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */
@@ -465,44 +491,17 @@ struct si_shader_selector {
   enum pipe_shader_type pipe_shader_type;
   ubyte const_and_shader_buf_descriptors_index;
   ubyte sampler_and_images_descriptors_index;
-   bool vs_needs_prolog;
   ubyte cs_shaderbufs_sgpr_index;
   ubyte cs_num_shaderbufs_in_user_sgprs;
   ubyte cs_images_sgpr_index;
   ubyte cs_images_num_sgprs;
   ubyte cs_num_images_in_user_sgprs;
-   ubyte num_vs_inputs;
-   ubyte num_vbos_in_user_sgprs;
   unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
-   ubyte clipdist_mask;
-   ubyte culldist_mask;
   enum pipe_prim_type rast_prim;

-   /* ES parameters. */
-   uint16_t esgs_itemsize; /* vertex stride */
-   uint16_t lshs_vertex_stride;
-
   /* GS parameters. */
-   uint16_t gsvs_vertex_size;
-   ubyte gs_input_verts_per_prim;
-   unsigned max_gsvs_emit_size;
   bool tess_turns_off_ngg;

-   /* PS parameters. */
-   ubyte color_attr_index[2];
-   unsigned db_shader_control;
-   /* Set 0xf or 0x0 (4 bits) per each written output.
-    * ANDed with spi_shader_col_format.
-    */
-   unsigned colors_written_4bit;
-
-   uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
-   uint64_t outputs_written;           /* "get_unique_index" bits */
-   uint32_t patch_outputs_written;     /* "get_unique_index_patch" bits */
-
-   uint64_t inputs_read; /* "get_unique_index" bits */
-   uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */
-
   /* bitmasks of used descriptor slots */
   uint64_t active_const_and_shader_buffers;
   uint64_t active_samplers_and_images;
@@ -952,7 +951,8 @@ const char *si_get_shader_name(const struct si_shader *shader);
 void si_shader_binary_clean(struct si_shader_binary *binary);

 /* si_shader_info.c */
-void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info);
+void si_nir_scan_shader(struct si_screen *sscreen,  const struct nir_shader *nir,
+                        struct si_shader_info *info);

 /* si_shader_llvm_gs.c */
 struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
--- a/src/gallium/drivers/radeonsi/si_shader_info.c
+++ b/src/gallium/drivers/radeonsi/si_shader_info.c
@@ -22,8 +22,10 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-#include "si_shader.h"
+#include "si_pipe.h"
 #include "util/mesa-sha1.h"
+#include "util/u_prim.h"
+#include "sid.h"


 struct si_shader_profile {
@@ -580,7 +582,8 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
   }
 }

-void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info)
+void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
+                        struct si_shader_info *info)
 {
   memset(info, 0, sizeof(*info));
   info->base = nir->info;
@@ -729,4 +732,155 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
      info->output_readmask[i] &= info->output_usagemask[i];

   info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir);
+
+   if (info->stage == MESA_SHADER_VERTEX ||
+       info->stage == MESA_SHADER_TESS_CTRL ||
+       info->stage == MESA_SHADER_TESS_EVAL ||
+       info->stage == MESA_SHADER_GEOMETRY) {
+      if (info->stage == MESA_SHADER_TESS_CTRL) {
+         /* Always reserve space for these. */
+         info->patch_outputs_written |=
+            (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
+            (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
+      }
+      for (unsigned i = 0; i < info->num_outputs; i++) {
+         unsigned semantic = info->output_semantic[i];
+
+         if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
+             semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
+             (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
+            info->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
+         } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
+                    semantic != VARYING_SLOT_EDGE) {
+            info->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
+
+            /* Ignore outputs that are not passed from VS to PS. */
+            if (semantic != VARYING_SLOT_POS &&
+                semantic != VARYING_SLOT_PSIZ &&
+                semantic != VARYING_SLOT_CLIP_VERTEX) {
+               info->outputs_written_before_ps |= 1ull
+                                                  << si_shader_io_get_unique_index(semantic, true);
+            }
+         }
+      }
+   }
+
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      info->num_vs_inputs =
+         info->stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0;
+      unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
+      info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs);
+
+      /* The prolog is a no-op if there are no inputs. */
+      info->vs_needs_prolog = info->num_inputs && !info->base.vs.blit_sgprs_amd;
+   }
+
+   if (nir->info.stage == MESA_SHADER_VERTEX ||
+       nir->info.stage == MESA_SHADER_TESS_CTRL ||
+       nir->info.stage == MESA_SHADER_TESS_EVAL) {
+      info->esgs_itemsize = util_last_bit64(info->outputs_written) * 16;
+      info->lshs_vertex_stride = info->esgs_itemsize;
+
+      /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
+       * will start on a different bank. (except for the maximum 32*16).
+       */
+      if (info->lshs_vertex_stride < 32 * 16)
+         info->lshs_vertex_stride += 4;
+
+      /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
+       * conflicts, i.e. each vertex will start at a different bank.
+       */
+      if (sscreen->info.chip_class >= GFX9)
+         info->esgs_itemsize += 4;
+
+      assert(((info->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
+
+      info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read &
+                                   ~info->base.inputs_read_indirectly &
+                                   info->base.inputs_read;
+   }
+
+   if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+      info->gsvs_vertex_size = info->num_outputs * 16;
+      info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out;
+      info->gs_input_verts_per_prim =
+         u_vertices_per_prim((enum pipe_prim_type)info->base.gs.input_primitive);
+   }
+
+   info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
+                         u_bit_consecutive(0, info->base.clip_distance_array_size);
+   info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) <<
+                         info->base.clip_distance_array_size;
+
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      for (unsigned i = 0; i < info->num_inputs; i++) {
+         unsigned semantic = info->input[i].semantic;
+
+         if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
+             semantic != VARYING_SLOT_PNTC) {
+            info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
+         }
+      }
+
+      for (unsigned i = 0; i < 8; i++)
+         if (info->colors_written & (1 << i))
+            info->colors_written_4bit |= 0xf << (4 * i);
+
+      for (unsigned i = 0; i < info->num_inputs; i++) {
+         if (info->input[i].semantic == VARYING_SLOT_COL0)
+            info->color_attr_index[0] = i;
+         else if (info->input[i].semantic == VARYING_SLOT_COL1)
+            info->color_attr_index[1] = i;
+      }
+
+      /* DB_SHADER_CONTROL */
+      info->db_shader_control = S_02880C_Z_EXPORT_ENABLE(info->writes_z) |
+                                S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) |
+                                S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) |
+                                S_02880C_KILL_ENABLE(info->base.fs.uses_discard);
+
+      switch (info->base.fs.depth_layout) {
+      case FRAG_DEPTH_LAYOUT_GREATER:
+         info->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+         break;
+      case FRAG_DEPTH_LAYOUT_LESS:
+         info->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
+         break;
+      default:;
+      }
+
+      /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
+       *
+       *   | early Z/S | writes_mem | allow_ReZ? |      Z_ORDER       | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
+       * --|-----------|------------|------------|--------------------|-------------------|-------------
+       * 1a|   false   |   false    |   true     | EarlyZ_Then_ReZ    |         0         |     0
+       * 1b|   false   |   false    |   false    | EarlyZ_Then_LateZ  |         0         |     0
+       * 2 |   false   |   true     |   n/a      |       LateZ        |         1         |     0
+       * 3 |   true    |   false    |   n/a      | EarlyZ_Then_LateZ  |         0         |     0
+       * 4 |   true    |   true     |   n/a      | EarlyZ_Then_LateZ  |         0         |     1
+       *
+       * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
+       * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
+       *
+       * Don't use ReZ without profiling !!!
+       *
+       * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
+       * shaders.
+       */
+      if (info->base.fs.early_fragment_tests) {
+         /* Cases 3, 4. */
+         info->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
+                                    S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
+                                    S_02880C_EXEC_ON_NOOP(info->base.writes_memory);
+      } else if (info->base.writes_memory) {
+         /* Case 2. */
+         info->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1);
+      } else {
+         /* Case 1. */
+         info->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+      }
+
+      if (info->base.fs.post_depth_coverage)
+         info->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1);
+   }
 }
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -1027,7 +1027,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
         /* We need the barrier only if TCS inputs are read from LDS. */
         if (!shader->key.ge.opt.same_patch_vertices ||
             shader->selector->info.base.inputs_read &
-             ~shader->selector->tcs_vgpr_only_inputs)
+             ~shader->selector->info.tcs_vgpr_only_inputs)
            ac_build_s_barrier(&ctx->ac);
      } else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
         /* gfx10_ngg_gs_emit_prologue inserts the barrier for NGG. */
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -140,7 +140,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi)
   int i;

   if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {
-      unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
+      unsigned itemsize_dw = es->selector->info.esgs_itemsize / 4;
      LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
      LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4);
      vertex_idx =
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -74,7 +74,7 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
   if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
      return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;

-   return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
+   return util_last_bit64(ctx->shader->selector->info.outputs_written) * 4;
 }

 static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
@@ -92,7 +92,7 @@ static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
   const struct si_shader_info *info = &ctx->shader->selector->info;
   unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
   unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
-   unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
+   unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->info.patch_outputs_written);
   unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4;
   return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
 }
@@ -155,12 +155,12 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)

   switch (ctx->stage) {
   case MESA_SHADER_VERTEX:
-      stride = ctx->shader->selector->lshs_vertex_stride / 4;
+      stride = ctx->shader->selector->info.lshs_vertex_stride / 4;
      return LLVMConstInt(ctx->ac.i32, stride, 0);

   case MESA_SHADER_TESS_CTRL:
      if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
-         stride = ctx->shader->key.ge.part.tcs.ls->lshs_vertex_stride / 4;
+         stride = ctx->shader->key.ge.part.tcs.ls->info.lshs_vertex_stride / 4;
         return LLVMConstInt(ctx->ac.i32, stride, 0);
      }
      return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
@@ -980,7 +980,7 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi)
         LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");

         if (!shader->key.ge.opt.same_patch_vertices ||
-             !(ctx->next_shader_sel->tcs_vgpr_only_inputs & (1ull << semantic)))
+             !(ctx->next_shader_sel->info.tcs_vgpr_only_inputs & (1ull << semantic)))
            lshs_lds_store(ctx, chan, dw_addr, value);

         if (shader->key.ge.opt.same_patch_vertices) {
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -111,7 +111,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
   unsigned bit_size = info->input[input_index].fp16_lo_hi_valid & 0x1 ? 16 : 32;
   LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32;
   LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32;
-   unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
+   unsigned num_vbos_in_user_sgprs = ctx->shader->selector->info.num_vbos_in_user_sgprs;
   union si_vs_fix_fetch fix_fetch;
   LLVMValueRef vb_desc;
   LLVMValueRef vertex_index;
@@ -391,7 +391,7 @@ void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx,
   LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
   LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
   LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
-   unsigned clipdist_mask = ctx->shader->selector->clipdist_mask &
+   unsigned clipdist_mask = ctx->shader->selector->info.clipdist_mask &
                            ~ctx->shader->key.ge.opt.kill_clip_distances;

   for (reg_index = 0; reg_index < 2; reg_index++) {
@@ -569,9 +569,9 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
   LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL,
                viewport_index_value = NULL;
   unsigned pos_idx, index;
-   unsigned clipdist_mask = (shader->selector->clipdist_mask &
+   unsigned clipdist_mask = (shader->selector->info.clipdist_mask &
                             ~shader->key.ge.opt.kill_clip_distances) |
-                            shader->selector->culldist_mask;
+                            shader->selector->info.culldist_mask;
   int i;

   si_vertex_color_clamping(ctx, outputs, noutput);
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -834,9 +834,9 @@ static void si_emit_clip_regs(struct si_context *sctx)
   struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
   bool window_space = info->stage == MESA_SHADER_VERTEX ?
                          info->base.vs.window_space_position : 0;
-   unsigned clipdist_mask = vs_sel->clipdist_mask;
+   unsigned clipdist_mask = vs_sel->info.clipdist_mask;
   unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SI_USER_CLIP_PLANE_MASK;
-   unsigned culldist_mask = vs_sel->culldist_mask;
+   unsigned culldist_mask = vs_sel->info.culldist_mask;

   /* Clip distances on points have no effect, so need to be implemented
    * as cull distances. This applies for the clipvertex case as well.
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -559,13 +559,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa

   /* This calculates how shader inputs and outputs among VS, TCS, and TES
    * are laid out in LDS. */
-   unsigned num_tcs_inputs = util_last_bit64(ls->outputs_written);
+   unsigned num_tcs_inputs = util_last_bit64(ls->info.outputs_written);
   unsigned num_tcs_output_cp, num_tcs_outputs, num_tcs_patch_outputs;

   if (sctx->shader.tcs.cso) {
-      num_tcs_outputs = util_last_bit64(tcs->outputs_written);
+      num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
      num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
-      num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
+      num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
   } else {
      /* No TCS. Route varyings from LS to TES. */
      num_tcs_outputs = num_tcs_inputs;
@@ -573,13 +573,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
      num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
   }

-   unsigned input_vertex_size = ls->lshs_vertex_stride;
+   unsigned input_vertex_size = ls->info.lshs_vertex_stride;
   unsigned output_vertex_size = num_tcs_outputs * 16;
   unsigned input_patch_size;

   /* Allocate LDS for TCS inputs only if it's used. */
   if (!ls_current->key.ge.opt.same_patch_vertices ||
-       tcs->info.base.inputs_read & ~tcs->tcs_vgpr_only_inputs)
+       tcs->info.base.inputs_read & ~tcs->info.tcs_vgpr_only_inputs)
      input_patch_size = num_tcs_input_cp * input_vertex_size;
   else
      input_patch_size = 0;
@@ -2112,8 +2112,8 @@ static void si_draw(struct pipe_context *ctx,
   struct si_shader_selector *vs = sctx->shader.vs.cso;
   struct si_vertex_state *vstate = (struct si_vertex_state *)state;
   if (unlikely(!vs ||
-                (!IS_DRAW_VERTEX_STATE && sctx->num_vertex_elements < vs->num_vs_inputs) ||
-                (IS_DRAW_VERTEX_STATE && vstate->velems.count < vs->num_vs_inputs) ||
+                (!IS_DRAW_VERTEX_STATE && sctx->num_vertex_elements < vs->info.num_vs_inputs) ||
+                (IS_DRAW_VERTEX_STATE && vstate->velems.count < vs->info.num_vs_inputs) ||
                !sctx->shader.ps.cso || (HAS_TESS != (prim == PIPE_PRIM_PATCHES)))) {
      assert(0);
      return;
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -614,7 +614,7 @@ static unsigned si_get_num_vs_user_sgprs(struct si_shader *shader,
 {
   struct si_shader_selector *vs =
      shader->previous_stage_sel ? shader->previous_stage_sel : shader->selector;
-   unsigned num_vbos_in_user_sgprs = vs->num_vbos_in_user_sgprs;
+   unsigned num_vbos_in_user_sgprs = vs->info.num_vbos_in_user_sgprs;

   /* 1 SGPR is reserved for the vertex buffer pointer. */
   assert(num_always_on_user_sgprs <= SI_SGPR_VS_VB_DESCRIPTOR_FIRST - 1);
@@ -744,7 +744,7 @@ static void si_emit_shader_es(struct si_context *sctx)
   radeon_begin(&sctx->gfx_cs);
   radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
                              SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
-                              shader->selector->esgs_itemsize / 4);
+                              shader->selector->info.esgs_itemsize / 4);

   if (shader->selector->info.stage == MESA_SHADER_TESS_EVAL)
      radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
@@ -815,7 +815,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
   /* We can't allow using the whole LDS, because GS waves compete with
    * other shader stages for LDS space. */
   const unsigned max_lds_size = 8 * 1024;
-   const unsigned esgs_itemsize = es->esgs_itemsize / 4;
+   const unsigned esgs_itemsize = es->info.esgs_itemsize / 4;
   unsigned esgs_lds_size;

   /* All these are per subgroup: */
@@ -842,7 +842,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
   /* If the primitive has adjacency, halve the number of vertices
    * that will be reused in multiple primitives.
    */
-   min_es_verts = gs->gs_input_verts_per_prim / (uses_adjacency ? 2 : 1);
+   min_es_verts = gs->info.gs_input_verts_per_prim / (uses_adjacency ? 2 : 1);

   gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
   worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
@@ -877,7 +877,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
   /* Vertices for adjacency primitives are not always reused, so restore
    * it for ES_VERTS_PER_SUBGRP.
    */
-   min_es_verts = gs->gs_input_verts_per_prim;
+   min_es_verts = gs->info.gs_input_verts_per_prim;

   /* For normal primitives, the VGT only checks if they are past the ES
    * verts per subgroup after allocating a full GS primitive and if they
@@ -1105,7 +1105,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
         S_028A44_GS_INST_PRIMS_IN_SUBGRP(shader->gs_info.gs_inst_prims_in_subgroup);
      shader->ctx_reg.gs.vgt_gs_max_prims_per_subgroup =
         S_028A94_MAX_PRIMS_PER_SUBGROUP(shader->gs_info.max_prims_per_subgroup);
-      shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.ge.part.gs.es->esgs_itemsize / 4;
+      shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.ge.part.gs.es->info.esgs_itemsize / 4;

      if (es_stage == MESA_SHADER_TESS_EVAL)
         si_set_tesseval_regs(sscreen, shader->key.ge.part.gs.es, shader);
@@ -1286,8 +1286,8 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
                                   const struct si_shader *shader, bool ngg)
 {
   /* Clip distances can be killed, but cull distances can't. */
-   unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) |
-                            sel->culldist_mask;
+   unsigned clipcull_mask = (sel->info.clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) |
+                            sel->info.culldist_mask;
   bool writes_psize = sel->info.writes_psize && !shader->key.ge.opt.kill_pointsize;
   bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
                       sel->screen->options.vrs2x2 ||
@@ -1427,7 +1427,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
                                        gs_sel->info.writes_primid);

   if (gs_stage == MESA_SHADER_GEOMETRY) {
-      shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->esgs_itemsize / 4;
+      shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->info.esgs_itemsize / 4;
      shader->ctx_reg.ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out;
   } else {
      shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = 1;
@@ -2071,16 +2071,16 @@ void si_update_ps_inputs_read_or_disabled(struct si_context *sctx)
                    (!ps_colormask && !ps_modifies_zs && !ps->info.base.writes_memory);
   }

-   sctx->ps_inputs_read_or_disabled = ps_disabled ? 0 : ps->inputs_read;
+   sctx->ps_inputs_read_or_disabled = ps_disabled ? 0 : ps->info.inputs_read;
 }

 static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
                                  union si_shader_key *key)
 {
-   key->ge.opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
+   key->ge.opt.kill_clip_distances = vs->info.clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;

   /* Find out which VS outputs aren't used by the PS. */
-   uint64_t outputs_written = vs->outputs_written_before_ps;
+   uint64_t outputs_written = vs->info.outputs_written_before_ps;
   uint64_t linked = outputs_written & sctx->ps_inputs_read_or_disabled;

   key->ge.opt.kill_outputs = ~linked & outputs_written;
@@ -2185,7 +2185,7 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)

   /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
   if (!key->ps.part.epilog.last_cbuf) {
-      key->ps.part.epilog.spi_shader_col_format &= sel->colors_written_4bit;
+      key->ps.part.epilog.spi_shader_col_format &= sel->info.colors_written_4bit;
      key->ps.part.epilog.color_is_int8 &= sel->info.colors_written;
      key->ps.part.epilog.color_is_int10 &= sel->info.colors_written;
   }
@@ -2196,7 +2196,7 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
    *
    * Dual source blending never has color buffer 1 enabled, so ignore it.
    */
-   if (sel->colors_written_4bit &
+   if (sel->info.colors_written_4bit &
       (blend->dual_src_blend ? 0xffffff0f : 0xffffffff) &
       ~(sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit))
      key->ps.opt.prefer_mono = 1;
@@ -2944,7 +2944,7 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
                semantic != VARYING_SLOT_CLIP_VERTEX &&
                semantic != VARYING_SLOT_EDGE) {
               id = si_shader_io_get_unique_index(semantic, true);
-               sel->outputs_written_before_ps &= ~(1ull << id);
+               sel->info.outputs_written_before_ps &= ~(1ull << id);
            }
         }
      }
@@ -3024,7 +3024,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
   struct si_screen *sscreen = (struct si_screen *)ctx->screen;
   struct si_context *sctx = (struct si_context *)ctx;
   struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
-   int i;

   if (!sel)
      return NULL;
@@ -3040,7 +3039,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
      sel->nir = (nir_shader*)state->ir.nir;
   }

-   si_nir_scan_shader(sel->nir, &sel->info);
+   si_nir_scan_shader(sscreen, sel->nir, &sel->info);

   const enum pipe_shader_type type = pipe_shader_type_from_mesa(sel->info.stage);
   sel->pipe_shader_type = type;
@@ -3053,49 +3052,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
   si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
                            &sel->active_samplers_and_images);

-   sel->num_vs_inputs =
-      sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
-         ? sel->info.num_inputs
-         : 0;
-   unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
-   sel->num_vbos_in_user_sgprs = MIN2(sel->num_vs_inputs, num_vbos_in_sgprs);
-
-   /* The prolog is a no-op if there are no inputs. */
-   sel->vs_needs_prolog = sel->info.stage == MESA_SHADER_VERTEX && sel->info.num_inputs &&
-                          !sel->info.base.vs.blit_sgprs_amd;
-
-   if (sel->info.stage == MESA_SHADER_VERTEX ||
-       sel->info.stage == MESA_SHADER_TESS_CTRL ||
-       sel->info.stage == MESA_SHADER_TESS_EVAL ||
-       sel->info.stage == MESA_SHADER_GEOMETRY) {
-      if (sel->info.stage == MESA_SHADER_TESS_CTRL) {
-         /* Always reserve space for these. */
-         sel->patch_outputs_written |=
-            (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
-            (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
-      }
-      for (i = 0; i < sel->info.num_outputs; i++) {
-         unsigned semantic = sel->info.output_semantic[i];
-
-         if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
-             semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
-             (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
-            sel->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
-         } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
-                    semantic != VARYING_SLOT_EDGE) {
-            sel->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
-
-            /* Ignore outputs that are not passed from VS to PS. */
-            if (semantic != VARYING_SLOT_POS &&
-                semantic != VARYING_SLOT_PSIZ &&
-                semantic != VARYING_SLOT_CLIP_VERTEX) {
-               sel->outputs_written_before_ps |= 1ull
-                                                 << si_shader_io_get_unique_index(semantic, true);
-            }
-         }
-      }
-   }
-
   switch (sel->info.stage) {
   case MESA_SHADER_GEOMETRY:
      /* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
@@ -3103,11 +3059,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
      if (util_rast_prim_is_triangles(sel->rast_prim))
         sel->rast_prim = PIPE_PRIM_TRIANGLES;

-      sel->gsvs_vertex_size = sel->info.num_outputs * 16;
-      sel->max_gsvs_emit_size = sel->gsvs_vertex_size * sel->info.base.gs.vertices_out;
-      sel->gs_input_verts_per_prim =
-         u_vertices_per_prim((enum pipe_prim_type)sel->info.base.gs.input_primitive);
-
      /* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation so
       * we can't split workgroups. Disable ngg if any of the following conditions is true:
       * - num_invocations * gs.vertices_out > 256
@@ -3120,30 +3071,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
      break;

   case MESA_SHADER_VERTEX:
-   case MESA_SHADER_TESS_CTRL:
   case MESA_SHADER_TESS_EVAL:
-      sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
-      sel->lshs_vertex_stride = sel->esgs_itemsize;
-
-      /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
-       * will start on a different bank. (except for the maximum 32*16).
-       */
-      if (sel->lshs_vertex_stride < 32 * 16)
-         sel->lshs_vertex_stride += 4;
-
-      /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
-       * conflicts, i.e. each vertex will start at a different bank.
-       */
-      if (sctx->chip_class >= GFX9)
-         sel->esgs_itemsize += 4;
-
-      assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
-
-      sel->tcs_vgpr_only_inputs = ~sel->info.base.tess.tcs_cross_invocation_inputs_read &
-                                  ~sel->info.base.inputs_read_indirectly &
-                                  sel->info.base.inputs_read;
-
-      /* Only for TES: */
      if (sel->info.stage == MESA_SHADER_TESS_EVAL) {
         if (sel->info.base.tess.point_mode)
            sel->rast_prim = PIPE_PRIM_POINTS;
@@ -3155,28 +3083,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
         sel->rast_prim = PIPE_PRIM_TRIANGLES;
      }
      break;
-
-   case MESA_SHADER_FRAGMENT:
-      for (i = 0; i < sel->info.num_inputs; i++) {
-         unsigned semantic = sel->info.input[i].semantic;
-
-         if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
-             semantic != VARYING_SLOT_PNTC) {
-            sel->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
-         }
-      }
-
-      for (i = 0; i < 8; i++)
-         if (sel->info.colors_written & (1 << i))
-            sel->colors_written_4bit |= 0xf << (4 * i);
-
-      for (i = 0; i < sel->info.num_inputs; i++) {
-         if (sel->info.input[i].semantic == VARYING_SLOT_COL0)
-            sel->color_attr_index[0] = i;
-         else if (sel->info.input[i].semantic == VARYING_SLOT_COL1)
-            sel->color_attr_index[1] = i;
-      }
-      break;
   default:;
   }

@@ -3208,63 +3114,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
      }
   }

-   sel->clipdist_mask = sel->info.writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
-                           u_bit_consecutive(0, sel->info.base.clip_distance_array_size);
-   sel->culldist_mask = u_bit_consecutive(0, sel->info.base.cull_distance_array_size) <<
-                        sel->info.base.clip_distance_array_size;
-
-   /* DB_SHADER_CONTROL */
-   sel->db_shader_control = S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) |
-                            S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) |
-                            S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) |
-                            S_02880C_KILL_ENABLE(sel->info.base.fs.uses_discard);
-
-   if (sel->info.stage == MESA_SHADER_FRAGMENT) {
-      switch (sel->info.base.fs.depth_layout) {
-      case FRAG_DEPTH_LAYOUT_GREATER:
-         sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
-         break;
-      case FRAG_DEPTH_LAYOUT_LESS:
-         sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
-         break;
-      default:;
-      }
-
-      /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
-       *
-       *   | early Z/S | writes_mem | allow_ReZ? |      Z_ORDER       | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
-       * --|-----------|------------|------------|--------------------|-------------------|-------------
-       * 1a|   false   |   false    |   true     | EarlyZ_Then_ReZ    |         0         |     0
-       * 1b|   false   |   false    |   false    | EarlyZ_Then_LateZ  |         0         |     0
-       * 2 |   false   |   true     |   n/a      |       LateZ        |         1         |     0
-       * 3 |   true    |   false    |   n/a      | EarlyZ_Then_LateZ  |         0         |     0
-       * 4 |   true    |   true     |   n/a      | EarlyZ_Then_LateZ  |         0         |     1
-       *
-       * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
-       * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
-       *
-       * Don't use ReZ without profiling !!!
-       *
-       * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
-       * shaders.
-       */
-      if (sel->info.base.fs.early_fragment_tests) {
-         /* Cases 3, 4. */
-         sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
-                                   S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
-                                   S_02880C_EXEC_ON_NOOP(sel->info.base.writes_memory);
-      } else if (sel->info.base.writes_memory) {
-         /* Case 2. */
-         sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1);
-      } else {
-         /* Case 1. */
-         sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
-      }
-
-      if (sel->info.base.fs.post_depth_coverage)
-         sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1);
-   }
-
   (void)simple_mtx_init(&sel->mutex, mtx_plain);

   si_schedule_initial_compile(sctx, sel->info.stage, &sel->ready, &sel->compiler_ctx_state,
@@ -3315,8 +3164,8 @@ static void si_update_clip_regs(struct si_context *sctx, struct si_shader_select
       (!old_hw_vs ||
        (old_hw_vs->info.stage == MESA_SHADER_VERTEX && old_hw_vs->info.base.vs.window_space_position) !=
        (next_hw_vs->info.stage == MESA_SHADER_VERTEX && next_hw_vs->info.base.vs.window_space_position) ||
-        old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask ||
-        old_hw_vs->culldist_mask != next_hw_vs->culldist_mask || !old_hw_vs_variant ||
+        old_hw_vs->info.clipdist_mask != next_hw_vs->info.clipdist_mask ||
+        old_hw_vs->info.culldist_mask != next_hw_vs->info.culldist_mask || !old_hw_vs_variant ||
        !next_hw_vs_variant ||
        old_hw_vs_variant->pa_cl_vs_out_cntl != next_hw_vs_variant->pa_cl_vs_out_cntl))
      si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
@@ -3383,7 +3232,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
   sctx->shader.vs.current = sel ? sel->first_variant : NULL;
   sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
   sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
-   sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0;
+   sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->info.outputs_written : 0;

   if (si_update_ngg(sctx))
      si_shader_change_notify(sctx);
@@ -3556,7 +3405,7 @@ void si_update_ps_kill_enable(struct si_context *sctx)
   if (!sctx->shader.ps.cso)
      return;

-   unsigned db_shader_control = sctx->shader.ps.cso->db_shader_control |
+   unsigned db_shader_control = sctx->shader.ps.cso->info.db_shader_control |
                                S_02880C_KILL_ENABLE(sctx->queued.named.dsa->alpha_func != PIPE_FUNC_ALWAYS);

   if (sctx->ps_db_shader_control != db_shader_control) {
@@ -3801,12 +3650,12 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
   unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;

   /* Calculate the minimum size. */
-   unsigned min_esgs_ring_size = align(es->esgs_itemsize * gs_vertex_reuse * wave_size, alignment);
+   unsigned min_esgs_ring_size = align(es->info.esgs_itemsize * gs_vertex_reuse * wave_size, alignment);

   /* These are recommended sizes, not minimum sizes. */
   unsigned esgs_ring_size =
-      max_gs_waves * 2 * wave_size * es->esgs_itemsize * gs->gs_input_verts_per_prim;
-   unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs->max_gsvs_emit_size;
+      max_gs_waves * 2 * wave_size * es->info.esgs_itemsize * gs->info.gs_input_verts_per_prim;
+   unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs->info.max_gsvs_emit_size;

   min_esgs_ring_size = align(min_esgs_ring_size, alignment);
   esgs_ring_size = align(esgs_ring_size, alignment);