nir: Extract shader_info->cs.shared_size out of union.

It is valid for all stages, just 0 for most of them. In particular mesh/task shaders might be using it. Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10094>
2021-04-08 12:30:14 +02:00
parent afd2f489d3
commit 580f1ac473
25 changed files with 44 additions and 44 deletions
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -474,7 +474,7 @@ setup_variables(isel_context *ctx, nir_shader *nir)
      break;
   }
   case MESA_SHADER_COMPUTE: {
-      ctx->program->config->lds_size = DIV_ROUND_UP(nir->info.cs.shared_size, ctx->program->dev.lds_encoding_granule);
+      ctx->program->config->lds_size = DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
      break;
   }
   case MESA_SHADER_VERTEX: {
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -5083,7 +5083,7 @@ static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)
   if (ctx->ac.lds)
      return;

-   LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.cs.shared_size);
+   LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.shared_size);

   LLVMValueRef lds =
      LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -657,9 +657,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
 			   nir_var_mem_shared, nir_address_format_32bit_offset);

 		if (nir->info.cs.zero_initialize_shared_memory &&
-		    nir->info.cs.shared_size > 0) {
+		    nir->info.shared_size > 0) {
 			const unsigned chunk_size = 16; /* max single store size */
-			const unsigned shared_size = ALIGN(nir->info.cs.shared_size, chunk_size);
+			const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
 			NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
 			           shared_size, chunk_size);
 		}
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -3642,7 +3642,7 @@ nir_to_vir(struct v3d_compile *c)
                        ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
                assert(c->local_invocation_index_bits <= 8);

-                if (c->s->info.cs.shared_size) {
+                if (c->s->info.shared_size) {
                        struct qreg wg_in_mem = vir_SHR(c, c->cs_payload[1],
                                                        vir_uniform_ui(c, 16));
                        if (c->s->info.cs.local_size[0] != 1 ||
@@ -3655,7 +3655,7 @@ nir_to_vir(struct v3d_compile *c)
                                                    vir_uniform_ui(c, wg_mask));
                        }
                        struct qreg shared_per_wg =
-                                vir_uniform_ui(c, c->s->info.cs.shared_size);
+                                vir_uniform_ui(c, c->s->info.shared_size);

                        c->cs_shared_offset =
                                vir_ADD(c,
--- a/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c
+++ b/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c
@@ -95,7 +95,7 @@ lower_shared(struct v3d_compile *c,
 {
        b->cursor = nir_before_instr(&instr->instr);
        nir_ssa_def *aligned_size =
-                nir_imm_int(b, c->s->info.cs.shared_size & 0xfffffffc);
+                nir_imm_int(b, c->s->info.shared_size & 0xfffffffc);
        nir_ssa_def *offset = nir_umin(b, instr->src[0].ssa, aligned_size);
        nir_instr_rewrite_src(&instr->instr, &instr->src[0],
                              nir_src_for_ssa(offset));
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -793,7 +793,7 @@ static void
 v3d_cs_set_prog_data(struct v3d_compile *c,
                     struct v3d_compute_prog_data *prog_data)
 {
-        prog_data->shared_size = c->s->info.cs.shared_size;
+        prog_data->shared_size = c->s->info.shared_size;

        prog_data->local_size[0] = c->s->info.cs.local_size[0];
        prog_data->local_size[1] = c->s->info.cs.local_size[1];
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -2288,7 +2288,7 @@ lower_vars_to_explicit(nir_shader *shader,
      shader->scratch_size = offset;
      break;
   case nir_var_mem_shared:
-      shader->info.cs.shared_size = offset;
+      shader->info.shared_size = offset;
      shader->shared_size = offset;
      break;
   case nir_var_mem_constant:
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -1629,7 +1629,7 @@ nir_print_shader_annotated(nir_shader *shader, FILE *fp,
              shader->info.cs.local_size[1],
              shader->info.cs.local_size[2],
              shader->info.cs.local_size_variable ? " (variable)" : "");
-      fprintf(fp, "shared-size: %u\n", shader->info.cs.shared_size);
+      fprintf(fp, "shared-size: %u\n", shader->info.shared_size);
   }

   fprintf(fp, "inputs: %u\n", shader->num_inputs);
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -183,6 +183,11 @@ typedef struct shader_info {
   /* SPV_KHR_float_controls: execution mode for floating point ops */
   uint16_t float_controls_execution_mode;

+   /**
+    * Size of shared variables accessed by compute/task/mesh shaders.
+    */
+   unsigned shared_size;
+
   uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
   uint8_t num_inlinable_uniforms:4;

@@ -377,11 +382,6 @@ typedef struct shader_info {

         bool zero_initialize_shared_memory;

-         /**
-          * Size of shared variables accessed by the compute shader.
-          */
-         unsigned shared_size;
-
         /**
          * pointer size is:
          *   AddressingModelLogical:    0    (default)
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -6066,7 +6066,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
         const bool align_to_stride = false;
         size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
      }
-      b->shader->info.cs.shared_size = size;
+      b->shader->info.shared_size = size;
      b->shader->shared_size = size;
   }

--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -2360,7 +2360,7 @@ ureg_setup_compute_shader(struct ureg_program *ureg,
   ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
                 info->cs.local_size[2]);

-   if (info->cs.shared_size)
+   if (info->shared_size)
      ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
 }

--- a/src/gallium/drivers/llvmpipe/lp_state_cs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c
@@ -456,10 +456,10 @@ llvmpipe_create_compute_state(struct pipe_context *pipe,
      shader->base.type = PIPE_SHADER_IR_NIR;

      pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir, false);
-      shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.cs.shared_size;
+      shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
   } else if (templ->ir_type == PIPE_SHADER_IR_NIR) {
      shader->base.ir.nir = (struct nir_shader *)templ->prog;
-      shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.cs.shared_size;
+      shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
   }
   if (shader->base.type == PIPE_SHADER_IR_TGSI) {
      /* get/save the summary info for this shader */
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1292,7 +1292,7 @@ Converter::parseNIR()
      info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
      info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
      info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
-      info_out->bin.smemSize += nir->info.cs.shared_size;
+      info_out->bin.smemSize += nir->info.shared_size;
      break;
   case Program::TYPE_FRAGMENT:
      info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -237,7 +237,7 @@ static void *si_create_compute_state(struct pipe_context *ctx, const struct pipe
      si_const_and_shader_buffer_descriptors_idx(PIPE_SHADER_COMPUTE);
   sel->sampler_and_images_descriptors_index =
      si_sampler_and_image_descriptors_idx(PIPE_SHADER_COMPUTE);
-   sel->info.base.cs.shared_size = cso->req_local_mem;
+   sel->info.base.shared_size = cso->req_local_mem;
   program->shader.selector = &program->sel;
   program->ir_type = cso->ir_type;
   program->private_size = cso->req_private_mem;
@@ -481,9 +481,9 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
       * tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
       */
      if (sctx->chip_class <= GFX6) {
-         lds_blocks += align(program->sel.info.base.cs.shared_size, 256) >> 8;
+         lds_blocks += align(program->sel.info.base.shared_size, 256) >> 8;
      } else {
-         lds_blocks += align(program->sel.info.base.cs.shared_size, 512) >> 9;
+         lds_blocks += align(program->sel.info.base.shared_size, 512) >> 9;
      }

      /* TODO: use si_multiwave_lds_size_workaround */
@@ -625,7 +625,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_
      dispatch.grid_size_z = util_cpu_to_le32(info->grid[2] * info->block[2]);

      dispatch.private_segment_size = util_cpu_to_le32(program->private_size);
-      dispatch.group_segment_size = util_cpu_to_le32(program->sel.info.base.cs.shared_size);
+      dispatch.group_segment_size = util_cpu_to_le32(program->sel.info.base.shared_size);

      dispatch.kernarg_address = util_cpu_to_le64(kernel_args_va);

--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -426,7 +426,7 @@ static LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
 static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
 {
   struct si_shader_selector *sel = ctx->shader->selector;
-   unsigned lds_size = sel->info.base.cs.shared_size;
+   unsigned lds_size = sel->info.base.shared_size;

   LLVMTypeRef i8p = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS);
   LLVMValueRef var;
@@ -487,7 +487,7 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
                                                      nir->info.cs.user_data_components_amd);
      }

-      if (ctx->shader->selector->info.base.cs.shared_size)
+      if (ctx->shader->selector->info.base.shared_size)
         si_llvm_declare_compute_memory(ctx);
   }

--- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
+++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
@@ -3666,8 +3666,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
                                           s->info.gs.vertices_out);
      break;
   case MESA_SHADER_COMPUTE:
-      if (s->info.cs.shared_size)
-         create_shared_block(&ctx, s->info.cs.shared_size);
+      if (s->info.shared_size)
+         create_shared_block(&ctx, s->info.shared_size);

      if (s->info.cs.local_size[0] || s->info.cs.local_size[1] || s->info.cs.local_size[2])
         spirv_builder_emit_exec_mode_literal3(&ctx.builder, entry_point, SpvExecutionModeLocalSize,
--- a/src/gallium/frontends/lavapipe/lvp_pipeline.c
+++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c
@@ -674,7 +674,7 @@ lvp_pipeline_compile(struct lvp_pipeline *pipeline,
      struct pipe_compute_state shstate = {0};
      shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE];
      shstate.ir_type = PIPE_SHADER_IR_NIR;
-      shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.cs.shared_size;
+      shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size;
      pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
   } else {
      struct pipe_shader_state shstate = {0};
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -9445,7 +9445,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
   const bool debug_enabled = INTEL_DEBUG & DEBUG_CS;

   prog_data->base.stage = MESA_SHADER_COMPUTE;
-   prog_data->base.total_shared = nir->info.cs.shared_size;
+   prog_data->base.total_shared = nir->info.shared_size;

   /* Generate code for all the possible SIMD variants. */
   bool generate_all;
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1717,15 +1717,15 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
                 nir_var_mem_shared, nir_address_format_32bit_offset);

      if (stage.nir->info.cs.zero_initialize_shared_memory &&
-          stage.nir->info.cs.shared_size > 0) {
+          stage.nir->info.shared_size > 0) {
         /* The effective Shared Local Memory size is at least 1024 bytes and
          * is always rounded to a power of two, so it is OK to align the size
          * used by the shader to chunk_size -- which does simplify the logic.
          */
         const unsigned chunk_size = 16;
-         const unsigned shared_size = ALIGN(stage.nir->info.cs.shared_size, chunk_size);
+         const unsigned shared_size = ALIGN(stage.nir->info.shared_size, chunk_size);
         assert(shared_size <=
-                calculate_gen_slm_size(compiler->devinfo->ver, stage.nir->info.cs.shared_size));
+                calculate_gen_slm_size(compiler->devinfo->ver, stage.nir->info.shared_size));

         NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory,
                    shared_size, chunk_size);
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -92,7 +92,7 @@ brw_codegen_cs_prog(struct brw_context *brw,

   memset(&prog_data, 0, sizeof(prog_data));

-   if (cp->program.info.cs.shared_size > 64 * 1024) {
+   if (cp->program.info.shared_size > 64 * 1024) {
      cp->program.sh.data->LinkStatus = LINKING_FAILURE;
      const char *error_str =
         "Compute shader used more than 64KB of shared variables";
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2623,7 +2623,7 @@ _mesa_copy_linked_program_data(const struct gl_shader_program *src,
      break;
   }
   case MESA_SHADER_COMPUTE: {
-      dst->info.cs.shared_size = src->Comp.SharedSize;
+      dst->info.shared_size = src->Comp.SharedSize;
      break;
   }
   default:
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -548,7 +548,7 @@ st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
   case MESA_SHADER_COMPUTE: {
      struct pipe_compute_state cs = {0};
      cs.ir_type = state->type;
-      cs.req_local_mem = nir->info.cs.shared_size;
+      cs.req_local_mem = nir->info.shared_size;

      if (state->type == PIPE_SHADER_IR_NIR)
         cs.prog = state->ir.nir;
@@ -890,7 +890,7 @@ st_create_common_variant(struct st_context *st,
   case MESA_SHADER_COMPUTE: {
      struct pipe_compute_state cs = {0};
      cs.ir_type = state.type;
-      cs.req_local_mem = stp->Base.info.cs.shared_size;
+      cs.req_local_mem = stp->Base.info.shared_size;

      if (state.type == PIPE_SHADER_IR_NIR)
         cs.prog = state.ir.nir;
--- a/src/microsoft/clc/clc_compiler.c
+++ b/src/microsoft/clc/clc_compiler.c
@@ -1401,12 +1401,12 @@ clc_to_dxil(struct clc_context *ctx,
       */
      unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;

-      nir->info.cs.shared_size = align(nir->info.cs.shared_size, alignment);
-      metadata->args[i].localptr.sharedmem_offset = nir->info.cs.shared_size;
-      nir->info.cs.shared_size += size;
+      nir->info.shared_size = align(nir->info.shared_size, alignment);
+      metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;
+      nir->info.shared_size += size;
   }

-   metadata->local_mem_size = nir->info.cs.shared_size;
+   metadata->local_mem_size = nir->info.shared_size;
   metadata->priv_mem_size = nir->scratch_size;

   /* DXIL double math is too limited compared to what NIR expects. Let's refuse
--- a/src/microsoft/compiler/nir_to_dxil.c
+++ b/src/microsoft/compiler/nir_to_dxil.c
@@ -4039,7 +4039,7 @@ emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_opt
      }
   }

-   if (s->info.cs.shared_size && shader_has_shared_ops(s)) {
+   if (s->info.shared_size && shader_has_shared_ops(s)) {
      const struct dxil_type *type;
      unsigned size;

@@ -4053,7 +4053,7 @@ emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_opt
      * pointer is in the groupshared address space, making the 32-bit -> 64-bit
      * pointer cast impossible.
      */
-      size = ALIGN_POT(s->info.cs.shared_size, sizeof(uint32_t));
+      size = ALIGN_POT(s->info.shared_size, sizeof(uint32_t));
      type = dxil_module_get_array_type(&ctx->mod,
                                        dxil_module_get_int_type(&ctx->mod, 32),
                                        size / sizeof(uint32_t));
--- a/src/panfrost/lib/pan_shader.c
+++ b/src/panfrost/lib/pan_shader.c
@@ -217,7 +217,7 @@ pan_shader_compile(const struct panfrost_device *dev,
                                 &info->varyings.input_count);
                break;
        case MESA_SHADER_COMPUTE:
-                info->wls_size = s->info.cs.shared_size;
+                info->wls_size = s->info.shared_size;
                break;
        default:
                unreachable("Unknown shader state");