nir: Extract shader_info->cs.shared_size out of union.
It is valid for all stages, just 0 for most of them. In particular mesh/task shaders might be using it. Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10094>
This commit is contained in:

committed by
Marge Bot

parent
afd2f489d3
commit
580f1ac473
@@ -474,7 +474,7 @@ setup_variables(isel_context *ctx, nir_shader *nir)
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
ctx->program->config->lds_size = DIV_ROUND_UP(nir->info.cs.shared_size, ctx->program->dev.lds_encoding_granule);
|
||||
ctx->program->config->lds_size = DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_VERTEX: {
|
||||
|
@@ -5083,7 +5083,7 @@ static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)
|
||||
if (ctx->ac.lds)
|
||||
return;
|
||||
|
||||
LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.cs.shared_size);
|
||||
LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.shared_size);
|
||||
|
||||
LLVMValueRef lds =
|
||||
LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);
|
||||
|
@@ -657,9 +657,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
||||
nir_var_mem_shared, nir_address_format_32bit_offset);
|
||||
|
||||
if (nir->info.cs.zero_initialize_shared_memory &&
|
||||
nir->info.cs.shared_size > 0) {
|
||||
nir->info.shared_size > 0) {
|
||||
const unsigned chunk_size = 16; /* max single store size */
|
||||
const unsigned shared_size = ALIGN(nir->info.cs.shared_size, chunk_size);
|
||||
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
|
||||
NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
|
||||
shared_size, chunk_size);
|
||||
}
|
||||
|
@@ -3642,7 +3642,7 @@ nir_to_vir(struct v3d_compile *c)
|
||||
ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
|
||||
assert(c->local_invocation_index_bits <= 8);
|
||||
|
||||
if (c->s->info.cs.shared_size) {
|
||||
if (c->s->info.shared_size) {
|
||||
struct qreg wg_in_mem = vir_SHR(c, c->cs_payload[1],
|
||||
vir_uniform_ui(c, 16));
|
||||
if (c->s->info.cs.local_size[0] != 1 ||
|
||||
@@ -3655,7 +3655,7 @@ nir_to_vir(struct v3d_compile *c)
|
||||
vir_uniform_ui(c, wg_mask));
|
||||
}
|
||||
struct qreg shared_per_wg =
|
||||
vir_uniform_ui(c, c->s->info.cs.shared_size);
|
||||
vir_uniform_ui(c, c->s->info.shared_size);
|
||||
|
||||
c->cs_shared_offset =
|
||||
vir_ADD(c,
|
||||
|
@@ -95,7 +95,7 @@ lower_shared(struct v3d_compile *c,
|
||||
{
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
nir_ssa_def *aligned_size =
|
||||
nir_imm_int(b, c->s->info.cs.shared_size & 0xfffffffc);
|
||||
nir_imm_int(b, c->s->info.shared_size & 0xfffffffc);
|
||||
nir_ssa_def *offset = nir_umin(b, instr->src[0].ssa, aligned_size);
|
||||
nir_instr_rewrite_src(&instr->instr, &instr->src[0],
|
||||
nir_src_for_ssa(offset));
|
||||
|
@@ -793,7 +793,7 @@ static void
|
||||
v3d_cs_set_prog_data(struct v3d_compile *c,
|
||||
struct v3d_compute_prog_data *prog_data)
|
||||
{
|
||||
prog_data->shared_size = c->s->info.cs.shared_size;
|
||||
prog_data->shared_size = c->s->info.shared_size;
|
||||
|
||||
prog_data->local_size[0] = c->s->info.cs.local_size[0];
|
||||
prog_data->local_size[1] = c->s->info.cs.local_size[1];
|
||||
|
@@ -2288,7 +2288,7 @@ lower_vars_to_explicit(nir_shader *shader,
|
||||
shader->scratch_size = offset;
|
||||
break;
|
||||
case nir_var_mem_shared:
|
||||
shader->info.cs.shared_size = offset;
|
||||
shader->info.shared_size = offset;
|
||||
shader->shared_size = offset;
|
||||
break;
|
||||
case nir_var_mem_constant:
|
||||
|
@@ -1629,7 +1629,7 @@ nir_print_shader_annotated(nir_shader *shader, FILE *fp,
|
||||
shader->info.cs.local_size[1],
|
||||
shader->info.cs.local_size[2],
|
||||
shader->info.cs.local_size_variable ? " (variable)" : "");
|
||||
fprintf(fp, "shared-size: %u\n", shader->info.cs.shared_size);
|
||||
fprintf(fp, "shared-size: %u\n", shader->info.shared_size);
|
||||
}
|
||||
|
||||
fprintf(fp, "inputs: %u\n", shader->num_inputs);
|
||||
|
@@ -183,6 +183,11 @@ typedef struct shader_info {
|
||||
/* SPV_KHR_float_controls: execution mode for floating point ops */
|
||||
uint16_t float_controls_execution_mode;
|
||||
|
||||
/**
|
||||
* Size of shared variables accessed by compute/task/mesh shaders.
|
||||
*/
|
||||
unsigned shared_size;
|
||||
|
||||
uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
|
||||
uint8_t num_inlinable_uniforms:4;
|
||||
|
||||
@@ -377,11 +382,6 @@ typedef struct shader_info {
|
||||
|
||||
bool zero_initialize_shared_memory;
|
||||
|
||||
/**
|
||||
* Size of shared variables accessed by the compute shader.
|
||||
*/
|
||||
unsigned shared_size;
|
||||
|
||||
/**
|
||||
* pointer size is:
|
||||
* AddressingModelLogical: 0 (default)
|
||||
|
@@ -6066,7 +6066,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
const bool align_to_stride = false;
|
||||
size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
|
||||
}
|
||||
b->shader->info.cs.shared_size = size;
|
||||
b->shader->info.shared_size = size;
|
||||
b->shader->shared_size = size;
|
||||
}
|
||||
|
||||
|
@@ -2360,7 +2360,7 @@ ureg_setup_compute_shader(struct ureg_program *ureg,
|
||||
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
|
||||
info->cs.local_size[2]);
|
||||
|
||||
if (info->cs.shared_size)
|
||||
if (info->shared_size)
|
||||
ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
|
||||
}
|
||||
|
||||
|
@@ -456,10 +456,10 @@ llvmpipe_create_compute_state(struct pipe_context *pipe,
|
||||
shader->base.type = PIPE_SHADER_IR_NIR;
|
||||
|
||||
pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir, false);
|
||||
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.cs.shared_size;
|
||||
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
|
||||
} else if (templ->ir_type == PIPE_SHADER_IR_NIR) {
|
||||
shader->base.ir.nir = (struct nir_shader *)templ->prog;
|
||||
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.cs.shared_size;
|
||||
shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
|
||||
}
|
||||
if (shader->base.type == PIPE_SHADER_IR_TGSI) {
|
||||
/* get/save the summary info for this shader */
|
||||
|
@@ -1292,7 +1292,7 @@ Converter::parseNIR()
|
||||
info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
|
||||
info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
|
||||
info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
|
||||
info_out->bin.smemSize += nir->info.cs.shared_size;
|
||||
info_out->bin.smemSize += nir->info.shared_size;
|
||||
break;
|
||||
case Program::TYPE_FRAGMENT:
|
||||
info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
|
||||
|
@@ -237,7 +237,7 @@ static void *si_create_compute_state(struct pipe_context *ctx, const struct pipe
|
||||
si_const_and_shader_buffer_descriptors_idx(PIPE_SHADER_COMPUTE);
|
||||
sel->sampler_and_images_descriptors_index =
|
||||
si_sampler_and_image_descriptors_idx(PIPE_SHADER_COMPUTE);
|
||||
sel->info.base.cs.shared_size = cso->req_local_mem;
|
||||
sel->info.base.shared_size = cso->req_local_mem;
|
||||
program->shader.selector = &program->sel;
|
||||
program->ir_type = cso->ir_type;
|
||||
program->private_size = cso->req_private_mem;
|
||||
@@ -481,9 +481,9 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
||||
* tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
|
||||
*/
|
||||
if (sctx->chip_class <= GFX6) {
|
||||
lds_blocks += align(program->sel.info.base.cs.shared_size, 256) >> 8;
|
||||
lds_blocks += align(program->sel.info.base.shared_size, 256) >> 8;
|
||||
} else {
|
||||
lds_blocks += align(program->sel.info.base.cs.shared_size, 512) >> 9;
|
||||
lds_blocks += align(program->sel.info.base.shared_size, 512) >> 9;
|
||||
}
|
||||
|
||||
/* TODO: use si_multiwave_lds_size_workaround */
|
||||
@@ -625,7 +625,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_
|
||||
dispatch.grid_size_z = util_cpu_to_le32(info->grid[2] * info->block[2]);
|
||||
|
||||
dispatch.private_segment_size = util_cpu_to_le32(program->private_size);
|
||||
dispatch.group_segment_size = util_cpu_to_le32(program->sel.info.base.cs.shared_size);
|
||||
dispatch.group_segment_size = util_cpu_to_le32(program->sel.info.base.shared_size);
|
||||
|
||||
dispatch.kernarg_address = util_cpu_to_le64(kernel_args_va);
|
||||
|
||||
|
@@ -426,7 +426,7 @@ static LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
|
||||
static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
|
||||
{
|
||||
struct si_shader_selector *sel = ctx->shader->selector;
|
||||
unsigned lds_size = sel->info.base.cs.shared_size;
|
||||
unsigned lds_size = sel->info.base.shared_size;
|
||||
|
||||
LLVMTypeRef i8p = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS);
|
||||
LLVMValueRef var;
|
||||
@@ -487,7 +487,7 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
|
||||
nir->info.cs.user_data_components_amd);
|
||||
}
|
||||
|
||||
if (ctx->shader->selector->info.base.cs.shared_size)
|
||||
if (ctx->shader->selector->info.base.shared_size)
|
||||
si_llvm_declare_compute_memory(ctx);
|
||||
}
|
||||
|
||||
|
@@ -3666,8 +3666,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
|
||||
s->info.gs.vertices_out);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
if (s->info.cs.shared_size)
|
||||
create_shared_block(&ctx, s->info.cs.shared_size);
|
||||
if (s->info.shared_size)
|
||||
create_shared_block(&ctx, s->info.shared_size);
|
||||
|
||||
if (s->info.cs.local_size[0] || s->info.cs.local_size[1] || s->info.cs.local_size[2])
|
||||
spirv_builder_emit_exec_mode_literal3(&ctx.builder, entry_point, SpvExecutionModeLocalSize,
|
||||
|
@@ -674,7 +674,7 @@ lvp_pipeline_compile(struct lvp_pipeline *pipeline,
|
||||
struct pipe_compute_state shstate = {0};
|
||||
shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE];
|
||||
shstate.ir_type = PIPE_SHADER_IR_NIR;
|
||||
shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.cs.shared_size;
|
||||
shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size;
|
||||
pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
|
||||
} else {
|
||||
struct pipe_shader_state shstate = {0};
|
||||
|
@@ -9445,7 +9445,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
||||
const bool debug_enabled = INTEL_DEBUG & DEBUG_CS;
|
||||
|
||||
prog_data->base.stage = MESA_SHADER_COMPUTE;
|
||||
prog_data->base.total_shared = nir->info.cs.shared_size;
|
||||
prog_data->base.total_shared = nir->info.shared_size;
|
||||
|
||||
/* Generate code for all the possible SIMD variants. */
|
||||
bool generate_all;
|
||||
|
@@ -1717,15 +1717,15 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
||||
nir_var_mem_shared, nir_address_format_32bit_offset);
|
||||
|
||||
if (stage.nir->info.cs.zero_initialize_shared_memory &&
|
||||
stage.nir->info.cs.shared_size > 0) {
|
||||
stage.nir->info.shared_size > 0) {
|
||||
/* The effective Shared Local Memory size is at least 1024 bytes and
|
||||
* is always rounded to a power of two, so it is OK to align the size
|
||||
* used by the shader to chunk_size -- which does simplify the logic.
|
||||
*/
|
||||
const unsigned chunk_size = 16;
|
||||
const unsigned shared_size = ALIGN(stage.nir->info.cs.shared_size, chunk_size);
|
||||
const unsigned shared_size = ALIGN(stage.nir->info.shared_size, chunk_size);
|
||||
assert(shared_size <=
|
||||
calculate_gen_slm_size(compiler->devinfo->ver, stage.nir->info.cs.shared_size));
|
||||
calculate_gen_slm_size(compiler->devinfo->ver, stage.nir->info.shared_size));
|
||||
|
||||
NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory,
|
||||
shared_size, chunk_size);
|
||||
|
@@ -92,7 +92,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
if (cp->program.info.cs.shared_size > 64 * 1024) {
|
||||
if (cp->program.info.shared_size > 64 * 1024) {
|
||||
cp->program.sh.data->LinkStatus = LINKING_FAILURE;
|
||||
const char *error_str =
|
||||
"Compute shader used more than 64KB of shared variables";
|
||||
|
@@ -2623,7 +2623,7 @@ _mesa_copy_linked_program_data(const struct gl_shader_program *src,
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
dst->info.cs.shared_size = src->Comp.SharedSize;
|
||||
dst->info.shared_size = src->Comp.SharedSize;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@@ -548,7 +548,7 @@ st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
struct pipe_compute_state cs = {0};
|
||||
cs.ir_type = state->type;
|
||||
cs.req_local_mem = nir->info.cs.shared_size;
|
||||
cs.req_local_mem = nir->info.shared_size;
|
||||
|
||||
if (state->type == PIPE_SHADER_IR_NIR)
|
||||
cs.prog = state->ir.nir;
|
||||
@@ -890,7 +890,7 @@ st_create_common_variant(struct st_context *st,
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
struct pipe_compute_state cs = {0};
|
||||
cs.ir_type = state.type;
|
||||
cs.req_local_mem = stp->Base.info.cs.shared_size;
|
||||
cs.req_local_mem = stp->Base.info.shared_size;
|
||||
|
||||
if (state.type == PIPE_SHADER_IR_NIR)
|
||||
cs.prog = state.ir.nir;
|
||||
|
@@ -1401,12 +1401,12 @@ clc_to_dxil(struct clc_context *ctx,
|
||||
*/
|
||||
unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;
|
||||
|
||||
nir->info.cs.shared_size = align(nir->info.cs.shared_size, alignment);
|
||||
metadata->args[i].localptr.sharedmem_offset = nir->info.cs.shared_size;
|
||||
nir->info.cs.shared_size += size;
|
||||
nir->info.shared_size = align(nir->info.shared_size, alignment);
|
||||
metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;
|
||||
nir->info.shared_size += size;
|
||||
}
|
||||
|
||||
metadata->local_mem_size = nir->info.cs.shared_size;
|
||||
metadata->local_mem_size = nir->info.shared_size;
|
||||
metadata->priv_mem_size = nir->scratch_size;
|
||||
|
||||
/* DXIL double math is too limited compared to what NIR expects. Let's refuse
|
||||
|
@@ -4039,7 +4039,7 @@ emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_opt
|
||||
}
|
||||
}
|
||||
|
||||
if (s->info.cs.shared_size && shader_has_shared_ops(s)) {
|
||||
if (s->info.shared_size && shader_has_shared_ops(s)) {
|
||||
const struct dxil_type *type;
|
||||
unsigned size;
|
||||
|
||||
@@ -4053,7 +4053,7 @@ emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_opt
|
||||
* pointer is in the groupshared address space, making the 32-bit -> 64-bit
|
||||
* pointer cast impossible.
|
||||
*/
|
||||
size = ALIGN_POT(s->info.cs.shared_size, sizeof(uint32_t));
|
||||
size = ALIGN_POT(s->info.shared_size, sizeof(uint32_t));
|
||||
type = dxil_module_get_array_type(&ctx->mod,
|
||||
dxil_module_get_int_type(&ctx->mod, 32),
|
||||
size / sizeof(uint32_t));
|
||||
|
@@ -217,7 +217,7 @@ pan_shader_compile(const struct panfrost_device *dev,
|
||||
&info->varyings.input_count);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
info->wls_size = s->info.cs.shared_size;
|
||||
info->wls_size = s->info.shared_size;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown shader state");
|
||||
|
Reference in New Issue
Block a user