radv: Replace supports_spill with explict_scratch_args

The former was always true and hence dead code. We will want to
explicitly declare the ring offset register with ACO, but we also want
to declare the scratch offset too, and we can't try to disable it since
ACO also supports spilling and the determination of whether spilling has
to happen occurs well after setting up registers. So replace
supports_spill with something that will actually be used for ACO.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Connor Abbott
2019-11-12 15:38:46 +01:00
parent 4d6676d78a
commit e7f4cadd02
6 changed files with 49 additions and 54 deletions

View File

@@ -95,9 +95,6 @@ struct isel_context {
bool exec_potentially_empty = false;
} cf_info;
/* scratch */
bool scratch_enabled = false;
/* inputs common for merged stages */
Temp merged_wave_info = Temp(0, s1);
@@ -639,8 +636,7 @@ static void allocate_user_sgprs(isel_context *ctx,
user_sgpr_info.need_ring_offsets = true;
/* 2 user sgprs will nearly always be allocated for scratch/rings */
if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled)
user_sgpr_count += 2;
user_sgpr_count += 2;
switch (ctx->stage) {
case vertex_vs:
@@ -895,10 +891,8 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
arg_info args = {};
/* this needs to be in sgprs 0 and 1 */
if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) {
add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
}
add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
unsigned vgpr_idx = 0;
switch (ctx->stage) {
@@ -928,8 +922,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
else
declare_streamout_sgprs(ctx, &args, &idx);
if (ctx->options->supports_spill || ctx->scratch_enabled)
add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
declare_vs_input_vgprs(ctx, &args);
break;
@@ -940,8 +933,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr);
add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx);
if (ctx->options->supports_spill || ctx->scratch_enabled)
add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
ctx->program->config->spi_ps_input_addr = 0;
ctx->program->config->spi_ps_input_ena = 0;
@@ -1004,8 +996,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
if (ctx->program->info->cs.uses_local_invocation_idx)
add_arg(&args, s1, &ctx->tg_size, idx++);
if (ctx->options->supports_spill || ctx->scratch_enabled)
add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
add_arg(&args, v3, &ctx->local_invocation_ids, vgpr_idx++);
break;
@@ -1357,7 +1348,6 @@ setup_isel_context(Program* program,
unsigned scratch_size = 0;
for (unsigned i = 0; i < shader_count; i++)
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
ctx.scratch_enabled = scratch_size > 0;
ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
ctx.block = ctx.program->create_and_insert_block();

View File

@@ -411,15 +411,11 @@ static void create_function(struct radv_shader_context *ctx,
ctx->max_workgroup_size,
ctx->args->options);
if (ctx->args->options->supports_spill) {
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
NULL, 0, AC_FUNC_ATTR_READNONE);
ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
ac_array_in_const_addr_space(ctx->ac.v4i32), "");
} else if (ctx->args->ring_offsets.used) {
ctx->ring_offsets = ac_get_arg(&ctx->ac, ctx->args->ring_offsets);
}
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
NULL, 0, AC_FUNC_ATTR_READNONE);
ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
ac_array_in_const_addr_space(ctx->ac.v4i32), "");
load_descriptor_sets(ctx);

View File

@@ -1118,8 +1118,7 @@ shader_variant_compile(struct radv_device *device,
struct ac_llvm_compiler ac_llvm;
bool thread_compiler;
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
if (options->check_ir)
@@ -1200,7 +1199,7 @@ radv_shader_variant_compile(struct radv_device *device,
if (key)
options.key = *key;
options.supports_spill = true;
options.explicit_scratch_args = use_aco;
options.robust_buffer_access = device->robust_buffer_access;
return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,

View File

@@ -125,7 +125,7 @@ struct radv_shader_variant_key {
struct radv_nir_compiler_options {
struct radv_pipeline_layout *layout;
struct radv_shader_variant_key key;
bool supports_spill;
bool explicit_scratch_args;
bool clamp_shadow_reference;
bool robust_buffer_access;
bool dump_shader;

View File

@@ -72,7 +72,6 @@ set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
}
struct user_sgpr_info {
bool need_ring_offsets;
bool indirect_all_descriptor_sets;
uint8_t remaining_sgprs;
};
@@ -168,22 +167,8 @@ static void allocate_user_sgprs(struct radv_shader_args *args,
memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
/* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
if (stage == MESA_SHADER_GEOMETRY ||
stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_CTRL ||
stage == MESA_SHADER_TESS_EVAL ||
args->is_gs_copy_shader)
user_sgpr_info->need_ring_offsets = true;
if (stage == MESA_SHADER_FRAGMENT &&
args->shader_info->ps.needs_sample_positions)
user_sgpr_info->need_ring_offsets = true;
/* 2 user sgprs will nearly always be allocated for scratch/rings */
if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) {
user_sgpr_count += 2;
}
/* 2 user sgprs will always be allocated for scratch/rings */
user_sgpr_count += 2;
switch (stage) {
case MESA_SHADER_COMPUTE:
@@ -464,7 +449,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
allocate_user_sgprs(args, stage, has_previous_stage,
previous_stage, needs_view_index, &user_sgpr_info);
if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) {
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
&args->ring_offsets);
}
@@ -490,6 +475,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
&args->ac.tg_size);
}
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->scratch_offset);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
&args->ac.local_invocation_ids);
break;
@@ -513,6 +503,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
declare_streamout_sgprs(args, stage);
}
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->scratch_offset);
}
declare_vs_input_vgprs(args);
break;
case MESA_SHADER_TESS_CTRL:
@@ -524,7 +519,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
@@ -556,6 +551,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->scratch_offset);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -578,6 +577,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
declare_streamout_sgprs(args, stage);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
}
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->scratch_offset);
}
declare_tes_input_vgprs(args);
break;
case MESA_SHADER_GEOMETRY:
@@ -595,7 +598,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
&args->merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
@@ -638,6 +641,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->scratch_offset);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -660,6 +667,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
declare_global_input_sgprs(args, &user_sgpr_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->scratch_offset);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
@@ -682,7 +693,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
}
args->shader_info->num_input_vgprs = 0;
args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0;
args->shader_info->num_input_sgprs = 2;
args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
if (stage != MESA_SHADER_FRAGMENT)
@@ -690,10 +701,8 @@ radv_declare_shader_args(struct radv_shader_args *args,
uint8_t user_sgpr_idx = 0;
if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) {
set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
&user_sgpr_idx);
}
set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
&user_sgpr_idx);
/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
* the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */

View File

@@ -34,6 +34,7 @@ struct radv_shader_args {
struct ac_arg descriptor_sets[MAX_SETS];
struct ac_arg ring_offsets;
struct ac_arg scratch_offset;
struct ac_arg vertex_buffers;
struct ac_arg rel_auto_id;