nir: Extract shader_info->cs.shared_size out of union.

It is valid for all stages, just 0 for most of them. In particular
mesh/task shaders might be using it.

Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10094>
This commit is contained in:
Bas Nieuwenhuizen
2021-04-08 12:30:14 +02:00
committed by Marge Bot
parent afd2f489d3
commit 580f1ac473
25 changed files with 44 additions and 44 deletions

View File

@@ -474,7 +474,7 @@ setup_variables(isel_context *ctx, nir_shader *nir)
break;
}
case MESA_SHADER_COMPUTE: {
ctx->program->config->lds_size = DIV_ROUND_UP(nir->info.cs.shared_size, ctx->program->dev.lds_encoding_granule);
ctx->program->config->lds_size = DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
break;
}
case MESA_SHADER_VERTEX: {

View File

@@ -5083,7 +5083,7 @@ static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)
if (ctx->ac.lds)
return;
LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.cs.shared_size);
LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.shared_size);
LLVMValueRef lds =
LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);

View File

@@ -657,9 +657,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
nir_var_mem_shared, nir_address_format_32bit_offset);
if (nir->info.cs.zero_initialize_shared_memory &&
nir->info.cs.shared_size > 0) {
nir->info.shared_size > 0) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = ALIGN(nir->info.cs.shared_size, chunk_size);
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
shared_size, chunk_size);
}

View File

@@ -3642,7 +3642,7 @@ nir_to_vir(struct v3d_compile *c)
ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
assert(c->local_invocation_index_bits <= 8);
if (c->s->info.cs.shared_size) {
if (c->s->info.shared_size) {
struct qreg wg_in_mem = vir_SHR(c, c->cs_payload[1],
vir_uniform_ui(c, 16));
if (c->s->info.cs.local_size[0] != 1 ||
@@ -3655,7 +3655,7 @@ nir_to_vir(struct v3d_compile *c)
vir_uniform_ui(c, wg_mask));
}
struct qreg shared_per_wg =
vir_uniform_ui(c, c->s->info.cs.shared_size);
vir_uniform_ui(c, c->s->info.shared_size);
c->cs_shared_offset =
vir_ADD(c,

View File

@@ -95,7 +95,7 @@ lower_shared(struct v3d_compile *c,
{
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *aligned_size =
nir_imm_int(b, c->s->info.cs.shared_size & 0xfffffffc);
nir_imm_int(b, c->s->info.shared_size & 0xfffffffc);
nir_ssa_def *offset = nir_umin(b, instr->src[0].ssa, aligned_size);
nir_instr_rewrite_src(&instr->instr, &instr->src[0],
nir_src_for_ssa(offset));

View File

@@ -793,7 +793,7 @@ static void
v3d_cs_set_prog_data(struct v3d_compile *c,
struct v3d_compute_prog_data *prog_data)
{
prog_data->shared_size = c->s->info.cs.shared_size;
prog_data->shared_size = c->s->info.shared_size;
prog_data->local_size[0] = c->s->info.cs.local_size[0];
prog_data->local_size[1] = c->s->info.cs.local_size[1];

View File

@@ -2288,7 +2288,7 @@ lower_vars_to_explicit(nir_shader *shader,
shader->scratch_size = offset;
break;
case nir_var_mem_shared:
shader->info.cs.shared_size = offset;
shader->info.shared_size = offset;
shader->shared_size = offset;
break;
case nir_var_mem_constant:

View File

@@ -1629,7 +1629,7 @@ nir_print_shader_annotated(nir_shader *shader, FILE *fp,
shader->info.cs.local_size[1],
shader->info.cs.local_size[2],
shader->info.cs.local_size_variable ? " (variable)" : "");
fprintf(fp, "shared-size: %u\n", shader->info.cs.shared_size);
fprintf(fp, "shared-size: %u\n", shader->info.shared_size);
}
fprintf(fp, "inputs: %u\n", shader->num_inputs);

View File

@@ -183,6 +183,11 @@ typedef struct shader_info {
/* SPV_KHR_float_controls: execution mode for floating point ops */
uint16_t float_controls_execution_mode;
/**
* Size of shared variables accessed by compute/task/mesh shaders.
*/
unsigned shared_size;
uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
uint8_t num_inlinable_uniforms:4;
@@ -377,11 +382,6 @@ typedef struct shader_info {
bool zero_initialize_shared_memory;
/**
* Size of shared variables accessed by the compute shader.
*/
unsigned shared_size;
/**
* pointer size is:
* AddressingModelLogical: 0 (default)

View File

@@ -6066,7 +6066,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
const bool align_to_stride = false;
size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
}
b->shader->info.cs.shared_size = size;
b->shader->info.shared_size = size;
b->shader->shared_size = size;
}

View File

@@ -2360,7 +2360,7 @@ ureg_setup_compute_shader(struct ureg_program *ureg,
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
info->cs.local_size[2]);
if (info->cs.shared_size)
if (info->shared_size)
ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
}

View File

@@ -456,10 +456,10 @@ llvmpipe_create_compute_state(struct pipe_context *pipe,
shader->base.type = PIPE_SHADER_IR_NIR;
pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir, false);
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.cs.shared_size;
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
} else if (templ->ir_type == PIPE_SHADER_IR_NIR) {
shader->base.ir.nir = (struct nir_shader *)templ->prog;
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.cs.shared_size;
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
}
if (shader->base.type == PIPE_SHADER_IR_TGSI) {
/* get/save the summary info for this shader */

View File

@@ -1292,7 +1292,7 @@ Converter::parseNIR()
info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
info_out->bin.smemSize += nir->info.cs.shared_size;
info_out->bin.smemSize += nir->info.shared_size;
break;
case Program::TYPE_FRAGMENT:
info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;

View File

@@ -237,7 +237,7 @@ static void *si_create_compute_state(struct pipe_context *ctx, const struct pipe
si_const_and_shader_buffer_descriptors_idx(PIPE_SHADER_COMPUTE);
sel->sampler_and_images_descriptors_index =
si_sampler_and_image_descriptors_idx(PIPE_SHADER_COMPUTE);
sel->info.base.cs.shared_size = cso->req_local_mem;
sel->info.base.shared_size = cso->req_local_mem;
program->shader.selector = &program->sel;
program->ir_type = cso->ir_type;
program->private_size = cso->req_private_mem;
@@ -481,9 +481,9 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
* tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
*/
if (sctx->chip_class <= GFX6) {
lds_blocks += align(program->sel.info.base.cs.shared_size, 256) >> 8;
lds_blocks += align(program->sel.info.base.shared_size, 256) >> 8;
} else {
lds_blocks += align(program->sel.info.base.cs.shared_size, 512) >> 9;
lds_blocks += align(program->sel.info.base.shared_size, 512) >> 9;
}
/* TODO: use si_multiwave_lds_size_workaround */
@@ -625,7 +625,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_
dispatch.grid_size_z = util_cpu_to_le32(info->grid[2] * info->block[2]);
dispatch.private_segment_size = util_cpu_to_le32(program->private_size);
dispatch.group_segment_size = util_cpu_to_le32(program->sel.info.base.cs.shared_size);
dispatch.group_segment_size = util_cpu_to_le32(program->sel.info.base.shared_size);
dispatch.kernarg_address = util_cpu_to_le64(kernel_args_va);

View File

@@ -426,7 +426,7 @@ static LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
{
struct si_shader_selector *sel = ctx->shader->selector;
unsigned lds_size = sel->info.base.cs.shared_size;
unsigned lds_size = sel->info.base.shared_size;
LLVMTypeRef i8p = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS);
LLVMValueRef var;
@@ -487,7 +487,7 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
nir->info.cs.user_data_components_amd);
}
if (ctx->shader->selector->info.base.cs.shared_size)
if (ctx->shader->selector->info.base.shared_size)
si_llvm_declare_compute_memory(ctx);
}

View File

@@ -3666,8 +3666,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
s->info.gs.vertices_out);
break;
case MESA_SHADER_COMPUTE:
if (s->info.cs.shared_size)
create_shared_block(&ctx, s->info.cs.shared_size);
if (s->info.shared_size)
create_shared_block(&ctx, s->info.shared_size);
if (s->info.cs.local_size[0] || s->info.cs.local_size[1] || s->info.cs.local_size[2])
spirv_builder_emit_exec_mode_literal3(&ctx.builder, entry_point, SpvExecutionModeLocalSize,

View File

@@ -674,7 +674,7 @@ lvp_pipeline_compile(struct lvp_pipeline *pipeline,
struct pipe_compute_state shstate = {0};
shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE];
shstate.ir_type = PIPE_SHADER_IR_NIR;
shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.cs.shared_size;
shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size;
pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
} else {
struct pipe_shader_state shstate = {0};

View File

@@ -9445,7 +9445,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
const bool debug_enabled = INTEL_DEBUG & DEBUG_CS;
prog_data->base.stage = MESA_SHADER_COMPUTE;
prog_data->base.total_shared = nir->info.cs.shared_size;
prog_data->base.total_shared = nir->info.shared_size;
/* Generate code for all the possible SIMD variants. */
bool generate_all;

View File

@@ -1717,15 +1717,15 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
nir_var_mem_shared, nir_address_format_32bit_offset);
if (stage.nir->info.cs.zero_initialize_shared_memory &&
stage.nir->info.cs.shared_size > 0) {
stage.nir->info.shared_size > 0) {
/* The effective Shared Local Memory size is at least 1024 bytes and
* is always rounded to a power of two, so it is OK to align the size
* used by the shader to chunk_size -- which does simplify the logic.
*/
const unsigned chunk_size = 16;
const unsigned shared_size = ALIGN(stage.nir->info.cs.shared_size, chunk_size);
const unsigned shared_size = ALIGN(stage.nir->info.shared_size, chunk_size);
assert(shared_size <=
calculate_gen_slm_size(compiler->devinfo->ver, stage.nir->info.cs.shared_size));
calculate_gen_slm_size(compiler->devinfo->ver, stage.nir->info.shared_size));
NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory,
shared_size, chunk_size);

View File

@@ -92,7 +92,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
memset(&prog_data, 0, sizeof(prog_data));
if (cp->program.info.cs.shared_size > 64 * 1024) {
if (cp->program.info.shared_size > 64 * 1024) {
cp->program.sh.data->LinkStatus = LINKING_FAILURE;
const char *error_str =
"Compute shader used more than 64KB of shared variables";

View File

@@ -2623,7 +2623,7 @@ _mesa_copy_linked_program_data(const struct gl_shader_program *src,
break;
}
case MESA_SHADER_COMPUTE: {
dst->info.cs.shared_size = src->Comp.SharedSize;
dst->info.shared_size = src->Comp.SharedSize;
break;
}
default:

View File

@@ -548,7 +548,7 @@ st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
case MESA_SHADER_COMPUTE: {
struct pipe_compute_state cs = {0};
cs.ir_type = state->type;
cs.req_local_mem = nir->info.cs.shared_size;
cs.req_local_mem = nir->info.shared_size;
if (state->type == PIPE_SHADER_IR_NIR)
cs.prog = state->ir.nir;
@@ -890,7 +890,7 @@ st_create_common_variant(struct st_context *st,
case MESA_SHADER_COMPUTE: {
struct pipe_compute_state cs = {0};
cs.ir_type = state.type;
cs.req_local_mem = stp->Base.info.cs.shared_size;
cs.req_local_mem = stp->Base.info.shared_size;
if (state.type == PIPE_SHADER_IR_NIR)
cs.prog = state.ir.nir;

View File

@@ -1401,12 +1401,12 @@ clc_to_dxil(struct clc_context *ctx,
*/
unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;
nir->info.cs.shared_size = align(nir->info.cs.shared_size, alignment);
metadata->args[i].localptr.sharedmem_offset = nir->info.cs.shared_size;
nir->info.cs.shared_size += size;
nir->info.shared_size = align(nir->info.shared_size, alignment);
metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;
nir->info.shared_size += size;
}
metadata->local_mem_size = nir->info.cs.shared_size;
metadata->local_mem_size = nir->info.shared_size;
metadata->priv_mem_size = nir->scratch_size;
/* DXIL double math is too limited compared to what NIR expects. Let's refuse

View File

@@ -4039,7 +4039,7 @@ emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_opt
}
}
if (s->info.cs.shared_size && shader_has_shared_ops(s)) {
if (s->info.shared_size && shader_has_shared_ops(s)) {
const struct dxil_type *type;
unsigned size;
@@ -4053,7 +4053,7 @@ emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_opt
* pointer is in the groupshared address space, making the 32-bit -> 64-bit
* pointer cast impossible.
*/
size = ALIGN_POT(s->info.cs.shared_size, sizeof(uint32_t));
size = ALIGN_POT(s->info.shared_size, sizeof(uint32_t));
type = dxil_module_get_array_type(&ctx->mod,
dxil_module_get_int_type(&ctx->mod, 32),
size / sizeof(uint32_t));

View File

@@ -217,7 +217,7 @@ pan_shader_compile(const struct panfrost_device *dev,
&info->varyings.input_count);
break;
case MESA_SHADER_COMPUTE:
info->wls_size = s->info.cs.shared_size;
info->wls_size = s->info.shared_size;
break;
default:
unreachable("Unknown shader state");