radv: Move argument declaration out of nir_to_llvm

Now it's executed for ACO too.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Connor Abbott
2019-11-11 18:05:03 +01:00
parent 3b143369a5
commit 66c703b3e8
6 changed files with 823 additions and 784 deletions

View File

@@ -101,6 +101,7 @@ libradv_files = files(
'radv_radeon_winsys.h',
'radv_shader.c',
'radv_shader.h',
'radv_shader_args.c',
'radv_shader_args.h',
'radv_shader_helper.h',
'radv_shader_info.c',

View File

@@ -318,7 +318,7 @@ get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx)
static LLVMValueRef
create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module,
LLVMBuilderRef builder,
struct ac_shader_args *args,
const struct ac_shader_args *args,
enum ac_llvm_calling_convention convention,
unsigned max_workgroup_size,
const struct radv_nir_compiler_options *options)
@@ -337,385 +337,6 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module,
return main_function;
}
static void
set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
uint8_t num_sgprs)
{
ud_info->sgpr_idx = *sgpr_idx;
ud_info->num_sgprs = num_sgprs;
*sgpr_idx += num_sgprs;
}
static void
set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx,
uint8_t num_sgprs)
{
struct radv_userdata_info *ud_info =
&args->shader_info->user_sgprs_locs.shader_data[idx];
assert(ud_info);
set_loc(ud_info, sgpr_idx, num_sgprs);
}
static void
set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
{
bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
}
static void
set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
{
struct radv_userdata_locations *locs =
&args->shader_info->user_sgprs_locs;
struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
assert(ud_info);
set_loc(ud_info, sgpr_idx, 1);
locs->descriptor_sets_enabled |= 1 << idx;
}
struct user_sgpr_info {
bool need_ring_offsets;
bool indirect_all_descriptor_sets;
uint8_t remaining_sgprs;
};
static bool needs_view_index_sgpr(struct radv_shader_args *args,
gl_shader_stage stage)
{
switch (stage) {
case MESA_SHADER_VERTEX:
if (args->shader_info->needs_multiview_view_index ||
(!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index))
return true;
break;
case MESA_SHADER_TESS_EVAL:
if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index))
return true;
break;
case MESA_SHADER_GEOMETRY:
case MESA_SHADER_TESS_CTRL:
if (args->shader_info->needs_multiview_view_index)
return true;
break;
default:
break;
}
return false;
}
static uint8_t
count_vs_user_sgprs(struct radv_shader_args *args)
{
uint8_t count = 0;
if (args->shader_info->vs.has_vertex_buffers)
count++;
count += args->shader_info->vs.needs_draw_id ? 3 : 2;
return count;
}
static void allocate_inline_push_consts(struct radv_shader_args *args,
struct user_sgpr_info *user_sgpr_info)
{
uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
/* Only supported if shaders use push constants. */
if (args->shader_info->min_push_constant_used == UINT8_MAX)
return;
/* Only supported if shaders don't have indirect push constants. */
if (args->shader_info->has_indirect_push_constants)
return;
/* Only supported for 32-bit push constants. */
if (!args->shader_info->has_only_32bit_push_constants)
return;
uint8_t num_push_consts =
(args->shader_info->max_push_constant_used -
args->shader_info->min_push_constant_used) / 4;
/* Check if the number of user SGPRs is large enough. */
if (num_push_consts < remaining_sgprs) {
args->shader_info->num_inline_push_consts = num_push_consts;
} else {
args->shader_info->num_inline_push_consts = remaining_sgprs;
}
/* Clamp to the maximum number of allowed inlined push constants. */
if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
if (args->shader_info->num_inline_push_consts == num_push_consts &&
!args->shader_info->loads_dynamic_offsets) {
/* Disable the default push constants path if all constants are
* inlined and if shaders don't use dynamic descriptors.
*/
args->shader_info->loads_push_constants = false;
}
args->shader_info->base_inline_push_consts =
args->shader_info->min_push_constant_used / 4;
}
static void allocate_user_sgprs(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage,
bool needs_view_index,
struct user_sgpr_info *user_sgpr_info)
{
uint8_t user_sgpr_count = 0;
memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
/* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
if (stage == MESA_SHADER_GEOMETRY ||
stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_CTRL ||
stage == MESA_SHADER_TESS_EVAL ||
args->is_gs_copy_shader)
user_sgpr_info->need_ring_offsets = true;
if (stage == MESA_SHADER_FRAGMENT &&
args->shader_info->ps.needs_sample_positions)
user_sgpr_info->need_ring_offsets = true;
/* 2 user sgprs will nearly always be allocated for scratch/rings */
if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) {
user_sgpr_count += 2;
}
switch (stage) {
case MESA_SHADER_COMPUTE:
if (args->shader_info->cs.uses_grid_size)
user_sgpr_count += 3;
break;
case MESA_SHADER_FRAGMENT:
user_sgpr_count += args->shader_info->ps.needs_sample_positions;
break;
case MESA_SHADER_VERTEX:
if (!args->is_gs_copy_shader)
user_sgpr_count += count_vs_user_sgprs(args);
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX)
user_sgpr_count += count_vs_user_sgprs(args);
}
break;
case MESA_SHADER_TESS_EVAL:
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX) {
user_sgpr_count += count_vs_user_sgprs(args);
}
}
break;
default:
break;
}
if (needs_view_index)
user_sgpr_count++;
if (args->shader_info->loads_push_constants)
user_sgpr_count++;
if (args->shader_info->so.num_outputs)
user_sgpr_count++;
uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
uint32_t num_desc_set =
util_bitcount(args->shader_info->desc_set_used_mask);
if (remaining_sgprs < num_desc_set) {
user_sgpr_info->indirect_all_descriptor_sets = true;
user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
} else {
user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
}
allocate_inline_push_consts(args, user_sgpr_info);
}
static void
declare_global_input_sgprs(struct radv_shader_args *args,
const struct user_sgpr_info *user_sgpr_info)
{
/* 1 for each descriptor set */
if (!user_sgpr_info->indirect_all_descriptor_sets) {
uint32_t mask = args->shader_info->desc_set_used_mask;
while (mask) {
int i = u_bit_scan(&mask);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
&args->descriptor_sets[i]);
}
} else {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR,
&args->descriptor_sets[0]);
}
if (args->shader_info->loads_push_constants) {
/* 1 for push constants and dynamic descriptors */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
&args->ac.push_constants);
}
for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.inline_push_consts[i]);
}
args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts;
args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts;
if (args->shader_info->so.num_outputs) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
&args->streamout_buffers);
}
}
static void
declare_vs_specific_input_sgprs(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage)
{
if (!args->is_gs_copy_shader &&
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (args->shader_info->vs.has_vertex_buffers) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
&args->vertex_buffers);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
if (args->shader_info->vs.needs_draw_id) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
}
}
}
static void
declare_vs_input_vgprs(struct radv_shader_args *args)
{
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
if (!args->is_gs_copy_shader) {
if (args->options->key.vs_common_out.as_ls) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id);
if (args->options->chip_class >= GFX10) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
} else {
if (args->options->chip_class >= GFX10) {
if (args->options->key.vs_common_out.as_ngg) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
}
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
}
}
}
static void
declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
{
int i;
if (args->options->use_ngg_streamout) {
if (stage == MESA_SHADER_TESS_EVAL)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
return;
}
/* Streamout SGPRs. */
if (args->shader_info->so.num_outputs) {
assert(stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_EVAL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx);
} else if (stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
}
/* A streamout buffer offset is loaded if the stride is non-zero. */
for (i = 0; i < 4; i++) {
if (!args->shader_info->so.strides[i])
continue;
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]);
}
}
static void
declare_tes_input_vgprs(struct radv_shader_args *args)
{
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
}
static void
set_global_input_locs(struct radv_shader_args *args,
const struct user_sgpr_info *user_sgpr_info,
uint8_t *user_sgpr_idx)
{
uint32_t mask = args->shader_info->desc_set_used_mask;
if (!user_sgpr_info->indirect_all_descriptor_sets) {
while (mask) {
int i = u_bit_scan(&mask);
set_loc_desc(args, i, user_sgpr_idx);
}
} else {
set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS,
user_sgpr_idx);
args->shader_info->need_indirect_descriptor_sets = true;
}
if (args->shader_info->loads_push_constants) {
set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
if (args->shader_info->num_inline_push_consts) {
set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
args->shader_info->num_inline_push_consts);
}
if (args->streamout_buffers.used) {
set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS,
user_sgpr_idx);
}
}
static void
load_descriptor_sets(struct radv_shader_context *ctx)
{
@@ -741,30 +362,6 @@ load_descriptor_sets(struct radv_shader_context *ctx)
}
}
static void
set_vs_specific_input_locs(struct radv_shader_args *args,
gl_shader_stage stage, bool has_previous_stage,
gl_shader_stage previous_stage,
uint8_t *user_sgpr_idx)
{
if (!args->is_gs_copy_shader &&
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (args->shader_info->vs.has_vertex_buffers) {
set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS,
user_sgpr_idx);
}
unsigned vs_num = 2;
if (args->shader_info->vs.needs_draw_id)
vs_num++;
set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
user_sgpr_idx, vs_num);
}
}
static enum ac_llvm_calling_convention
get_llvm_calling_convention(LLVMValueRef func, gl_shader_stage stage)
{
@@ -796,313 +393,6 @@ static bool is_pre_gs_stage(gl_shader_stage stage)
return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
}
static void declare_inputs(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage)
{
struct user_sgpr_info user_sgpr_info;
bool needs_view_index = needs_view_index_sgpr(args, stage);
if (args->options->chip_class >= GFX10) {
if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) {
/* On GFX10, VS is merged into GS for NGG. */
previous_stage = stage;
stage = MESA_SHADER_GEOMETRY;
has_previous_stage = true;
}
}
for (int i = 0; i < MAX_SETS; i++)
args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
for (int i = 0; i < AC_UD_MAX_UD; i++)
args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
allocate_user_sgprs(args, stage, has_previous_stage,
previous_stage, needs_view_index, &user_sgpr_info);
if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
&args->ring_offsets);
}
switch (stage) {
case MESA_SHADER_COMPUTE:
declare_global_input_sgprs(args, &user_sgpr_info);
if (args->shader_info->cs.uses_grid_size) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT,
&args->ac.num_work_groups);
}
for (int i = 0; i < 3; i++) {
if (args->shader_info->cs.uses_block_id[i]) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.workgroup_ids[i]);
}
}
if (args->shader_info->cs.uses_local_invocation_idx) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.tg_size);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
&args->ac.local_invocation_ids);
break;
case MESA_SHADER_VERTEX:
declare_global_input_sgprs(args, &user_sgpr_info);
declare_vs_specific_input_sgprs(args, stage, has_previous_stage,
previous_stage);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
if (args->options->key.vs_common_out.as_es) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->es2gs_offset);
} else if (args->options->key.vs_common_out.as_ls) {
/* no extra parameters */
} else {
declare_streamout_sgprs(args, stage);
}
declare_vs_input_vgprs(args);
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
// First 6 system regs
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
declare_global_input_sgprs(args, &user_sgpr_info);
declare_vs_specific_input_sgprs(args, stage,
has_previous_stage,
previous_stage);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_rel_ids);
declare_vs_input_vgprs(args);
} else {
declare_global_input_sgprs(args, &user_sgpr_info);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_rel_ids);
}
break;
case MESA_SHADER_TESS_EVAL:
declare_global_input_sgprs(args, &user_sgpr_info);
if (needs_view_index)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
if (args->options->key.vs_common_out.as_es) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->es2gs_offset);
} else {
declare_streamout_sgprs(args, stage);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
}
declare_tes_input_vgprs(args);
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
// First 6 system regs
if (args->options->key.vs_common_out.as_ngg) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->gs_tg_info);
} else {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->gs2vs_offset);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
declare_global_input_sgprs(args, &user_sgpr_info);
if (previous_stage != MESA_SHADER_TESS_EVAL) {
declare_vs_specific_input_sgprs(args, stage,
has_previous_stage,
previous_stage);
}
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_invocation_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[4]);
if (previous_stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(args);
} else {
declare_tes_input_vgprs(args);
}
} else {
declare_global_input_sgprs(args, &user_sgpr_info);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[3]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[4]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[5]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_invocation_id);
}
break;
case MESA_SHADER_FRAGMENT:
declare_global_input_sgprs(args, &user_sgpr_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, NULL); /* persp pull model */
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
break;
default:
unreachable("Shader stage not implemented");
}
args->shader_info->num_input_vgprs = 0;
args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0;
args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
if (stage != MESA_SHADER_FRAGMENT)
args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
uint8_t user_sgpr_idx = 0;
if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) {
set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
&user_sgpr_idx);
}
/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
* the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
if (has_previous_stage)
user_sgpr_idx = 0;
set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
switch (stage) {
case MESA_SHADER_COMPUTE:
if (args->shader_info->cs.uses_grid_size) {
set_loc_shader(args, AC_UD_CS_GRID_SIZE,
&user_sgpr_idx, 3);
}
break;
case MESA_SHADER_VERTEX:
set_vs_specific_input_locs(args, stage, has_previous_stage,
previous_stage, &user_sgpr_idx);
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_CTRL:
set_vs_specific_input_locs(args, stage, has_previous_stage,
previous_stage, &user_sgpr_idx);
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_EVAL:
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX)
set_vs_specific_input_locs(args, stage,
has_previous_stage,
previous_stage,
&user_sgpr_idx);
}
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_FRAGMENT:
break;
default:
unreachable("Shader stage not implemented");
}
args->shader_info->num_user_sgprs = user_sgpr_idx;
}
static void create_function(struct radv_shader_context *ctx,
gl_shader_stage stage,
bool has_previous_stage)
@@ -4764,39 +4054,33 @@ static
LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *const *shaders,
int shader_count,
struct radv_shader_info *shader_info,
const struct radv_nir_compiler_options *options)
const struct radv_shader_args *args)
{
struct radv_shader_context ctx = {0};
struct radv_shader_args args = {0};
args.options = options;
args.shader_info = shader_info;
ctx.args = &args;
declare_inputs(&args, shaders[shader_count - 1]->info.stage, shader_count >= 2,
shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
ctx.args = args;
enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
if (shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
if (args->shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
}
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
options->family, float_mode, shader_info->wave_size, 64);
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
args->options->family, float_mode,
args->shader_info->wave_size, 64);
ctx.context = ctx.ac.context;
ctx.max_workgroup_size = 0;
for (int i = 0; i < shader_count; ++i) {
ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
radv_nir_get_max_workgroup_size(args.options->chip_class,
radv_nir_get_max_workgroup_size(args->options->chip_class,
shaders[i]->info.stage,
shaders[i]));
}
if (ctx.ac.chip_class >= GFX10) {
if (is_pre_gs_stage(shaders[0]->info.stage) &&
options->key.vs_common_out.as_ngg) {
args->options->key.vs_common_out.as_ngg) {
ctx.max_workgroup_size = 128;
}
}
@@ -4811,20 +4095,20 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.load_resource = radv_load_resource;
ctx.abi.clamp_shadow_reference = false;
ctx.abi.robust_buffer_access = options->robust_buffer_access;
ctx.abi.robust_buffer_access = args->options->robust_buffer_access;
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args.options->key.vs_common_out.as_ngg;
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg;
if (shader_count >= 2 || is_ngg)
ac_init_exec_full_mask(&ctx.ac);
if (args.ac.vertex_id.used)
ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args.ac.vertex_id);
if (args.rel_auto_id.used)
ctx.rel_auto_id = ac_get_arg(&ctx.ac, args.rel_auto_id);
if (args.ac.instance_id.used)
ctx.abi.instance_id = ac_get_arg(&ctx.ac, args.ac.instance_id);
if (args->ac.vertex_id.used)
ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id);
if (args->rel_auto_id.used)
ctx.rel_auto_id = ac_get_arg(&ctx.ac, args->rel_auto_id);
if (args->ac.instance_id.used)
ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
if (options->has_ls_vgpr_init_bug &&
if (args->options->has_ls_vgpr_init_bug &&
shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
ac_nir_fixup_ls_hs_input_vgprs(&ctx);
@@ -4858,7 +4142,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.gs_next_vertex[i] =
ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
}
if (args.options->key.vs_common_out.as_ngg) {
if (args->options->key.vs_common_out.as_ngg) {
for (unsigned i = 0; i < 4; ++i) {
ctx.gs_curprim_verts[i] =
ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
@@ -4867,7 +4151,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
}
unsigned scratch_size = 8;
if (args.shader_info->so.num_outputs)
if (args->shader_info->so.num_outputs)
scratch_size = 44;
LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
@@ -4890,15 +4174,15 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.abi.store_tcs_outputs = store_tcs_output;
if (shader_count == 1)
ctx.tcs_num_inputs = args.options->key.tcs.num_inputs;
ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
else
ctx.tcs_num_inputs = util_last_bit64(shader_info->vs.ls_outputs_written);
ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
ctx.tcs_num_patches = get_tcs_num_patches(&ctx);
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.abi.load_tess_varyings = load_tes_input;
ctx.abi.load_tess_coord = load_tess_coord;
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.tcs_num_patches = args.options->key.tes.num_patches;
ctx.tcs_num_patches = args->options->key.tes.num_patches;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
ctx.abi.load_base_vertex = radv_load_base_vertex;
} else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
@@ -4908,8 +4192,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
}
if (shaders[i]->info.stage == MESA_SHADER_VERTEX &&
args.options->key.vs_common_out.as_ngg &&
args.options->key.vs_common_out.export_prim_id) {
args->options->key.vs_common_out.as_ngg &&
args->options->key.vs_common_out.export_prim_id) {
declare_esgs_ring(&ctx);
}
@@ -4917,7 +4201,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
if (i) {
if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY &&
args.options->key.vs_common_out.as_ngg) {
args->options->key.vs_common_out.as_ngg) {
gfx10_ngg_gs_emit_prologue(&ctx);
nested_barrier = false;
} else {
@@ -4959,7 +4243,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
LLVMValueRef count =
ac_unpack_param(&ctx.ac,
ac_get_arg(&ctx.ac, args.merged_wave_info),
ac_get_arg(&ctx.ac, args->merged_wave_info),
8 * i, 8);
LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
@@ -4976,7 +4260,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
else if(shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
prepare_gs_input_vgprs(&ctx, shader_count >= 2);
ac_nir_translate(&ctx.ac, &ctx.abi, &args.ac, shaders[i]);
ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[i]);
if (shader_count >= 2 || is_ngg) {
LLVMBuildBr(ctx.ac.builder, merge_block);
@@ -4986,37 +4270,37 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
/* This needs to be outside the if wrapping the shader body, as sometimes
* the HW generates waves with 0 es/vs threads. */
if (is_pre_gs_stage(shaders[i]->info.stage) &&
args.options->key.vs_common_out.as_ngg &&
args->options->key.vs_common_out.as_ngg &&
i == shader_count - 1) {
handle_ngg_outputs_post_2(&ctx);
} else if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY &&
args.options->key.vs_common_out.as_ngg) {
args->options->key.vs_common_out.as_ngg) {
gfx10_ngg_gs_emit_epilogue_2(&ctx);
}
if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
shader_info->tcs.num_patches = ctx.tcs_num_patches;
shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx);
args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
args->shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx);
}
}
LLVMBuildRetVoid(ctx.ac.builder);
if (options->dump_preoptir) {
if (args->options->dump_preoptir) {
fprintf(stderr, "%s LLVM IR:\n\n",
radv_get_shader_name(shader_info,
radv_get_shader_name(args->shader_info,
shaders[shader_count - 1]->info.stage));
ac_dump_module(ctx.ac.module);
fprintf(stderr, "\n");
}
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options);
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
if (shader_count == 1)
ac_nir_eliminate_const_vs_outputs(&ctx);
if (options->dump_shader) {
args.shader_info->private_mem_vgprs =
if (args->options->dump_shader) {
args->shader_info->private_mem_vgprs =
ac_count_scratch_private_memory(ctx.main_function);
}
@@ -5110,28 +4394,26 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
void
radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
struct radv_shader_binary **rbinary,
struct radv_shader_info *shader_info,
const struct radv_shader_args *args,
struct nir_shader *const *nir,
int nir_count,
const struct radv_nir_compiler_options *options)
int nir_count)
{
LLVMModuleRef llvm_module;
llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info,
options);
llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args);
ac_compile_llvm_module(ac_llvm, llvm_module, rbinary,
nir[nir_count - 1]->info.stage,
radv_get_shader_name(shader_info,
radv_get_shader_name(args->shader_info,
nir[nir_count - 1]->info.stage),
options);
args->options);
/* Determine the ES type (VS or TES) for the GS on GFX9. */
if (options->chip_class >= GFX9) {
if (args->options->chip_class >= GFX9) {
if (nir_count == 2 &&
nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
shader_info->gs.es_type = nir[0]->info.stage;
args->shader_info->gs.es_type = nir[0]->info.stage;
}
}
}
@@ -5239,20 +4521,15 @@ void
radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *geom_shader,
struct radv_shader_binary **rbinary,
struct radv_shader_info *shader_info,
const struct radv_nir_compiler_options *options)
const struct radv_shader_args *args)
{
struct radv_shader_context ctx = {0};
struct radv_shader_args args = {0};
args.options = options;
args.shader_info = shader_info;
ctx.args = &args;
ctx.args = args;
args.is_gs_copy_shader = true;
declare_inputs(&args, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
assert(args->is_gs_copy_shader);
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
options->family, AC_FLOAT_MODE_DEFAULT, 64, 64);
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
args->options->family, AC_FLOAT_MODE_DEFAULT, 64, 64);
ctx.context = ctx.ac.context;
ctx.stage = MESA_SHADER_VERTEX;
@@ -5272,10 +4549,10 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
LLVMBuildRetVoid(ctx.ac.builder);
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options);
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary,
MESA_SHADER_VERTEX, "GS Copy Shader", options);
MESA_SHADER_VERTEX, "GS Copy Shader", args->options);
(*rbinary)->is_gs_copy_shader = true;
}

View File

@@ -2298,21 +2298,18 @@ struct radv_fence {
};
/* radv_nir_to_llvm.c */
struct radv_shader_info;
struct radv_nir_compiler_options;
struct radv_shader_args;
void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *geom_shader,
struct radv_shader_binary **rbinary,
struct radv_shader_info *info,
const struct radv_nir_compiler_options *option);
const struct radv_shader_args *args);
void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
struct radv_shader_binary **rbinary,
struct radv_shader_info *info,
const struct radv_shader_args *args,
struct nir_shader *const *nir,
int nir_count,
const struct radv_nir_compiler_options *options);
int nir_count);
unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
gl_shader_stage stage,

View File

@@ -31,6 +31,7 @@
#include "radv_private.h"
#include "radv_shader.h"
#include "radv_shader_helper.h"
#include "radv_shader_args.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "spirv/nir_spirv.h"
@@ -1095,6 +1096,17 @@ shader_variant_compile(struct radv_device *device,
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
struct radv_shader_args args = {};
args.options = options;
args.shader_info = info;
args.is_gs_copy_shader = gs_copy_shader;
radv_declare_shader_args(&args,
gs_copy_shader ? MESA_SHADER_VERTEX
: shaders[shader_count - 1]->info.stage,
shader_count >= 2,
shader_count >= 2 ? shaders[shader_count - 2]->info.stage
: MESA_SHADER_VERTEX);
if (!use_aco || options->dump_shader || options->record_ir)
ac_init_llvm_once();
@@ -1124,10 +1136,10 @@ shader_variant_compile(struct radv_device *device,
if (gs_copy_shader) {
assert(shader_count == 1);
radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
info, options);
&args);
} else {
radv_compile_nir_shader(&ac_llvm, &binary, info,
shaders, shader_count, options);
radv_compile_nir_shader(&ac_llvm, &binary, &args,
shaders, shader_count);
}
binary->info = *info;

View File

@@ -0,0 +1,747 @@
/*
* Copyright © 2019 Valve Corporation.
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* based in part on anv driver which is:
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "radv_private.h"
#include "radv_shader.h"
#include "radv_shader_args.h"
static void
set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
uint8_t num_sgprs)
{
ud_info->sgpr_idx = *sgpr_idx;
ud_info->num_sgprs = num_sgprs;
*sgpr_idx += num_sgprs;
}
static void
set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx,
uint8_t num_sgprs)
{
struct radv_userdata_info *ud_info =
&args->shader_info->user_sgprs_locs.shader_data[idx];
assert(ud_info);
set_loc(ud_info, sgpr_idx, num_sgprs);
}
static void
set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
{
bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
}
static void
set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
{
struct radv_userdata_locations *locs =
&args->shader_info->user_sgprs_locs;
struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
assert(ud_info);
set_loc(ud_info, sgpr_idx, 1);
locs->descriptor_sets_enabled |= 1 << idx;
}
struct user_sgpr_info {
bool need_ring_offsets;
bool indirect_all_descriptor_sets;
uint8_t remaining_sgprs;
};
static bool needs_view_index_sgpr(struct radv_shader_args *args,
gl_shader_stage stage)
{
switch (stage) {
case MESA_SHADER_VERTEX:
if (args->shader_info->needs_multiview_view_index ||
(!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index))
return true;
break;
case MESA_SHADER_TESS_EVAL:
if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index))
return true;
break;
case MESA_SHADER_GEOMETRY:
case MESA_SHADER_TESS_CTRL:
if (args->shader_info->needs_multiview_view_index)
return true;
break;
default:
break;
}
return false;
}
static uint8_t
count_vs_user_sgprs(struct radv_shader_args *args)
{
uint8_t count = 0;
if (args->shader_info->vs.has_vertex_buffers)
count++;
count += args->shader_info->vs.needs_draw_id ? 3 : 2;
return count;
}
static void allocate_inline_push_consts(struct radv_shader_args *args,
struct user_sgpr_info *user_sgpr_info)
{
uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
/* Only supported if shaders use push constants. */
if (args->shader_info->min_push_constant_used == UINT8_MAX)
return;
/* Only supported if shaders don't have indirect push constants. */
if (args->shader_info->has_indirect_push_constants)
return;
/* Only supported for 32-bit push constants. */
if (!args->shader_info->has_only_32bit_push_constants)
return;
uint8_t num_push_consts =
(args->shader_info->max_push_constant_used -
args->shader_info->min_push_constant_used) / 4;
/* Check if the number of user SGPRs is large enough. */
if (num_push_consts < remaining_sgprs) {
args->shader_info->num_inline_push_consts = num_push_consts;
} else {
args->shader_info->num_inline_push_consts = remaining_sgprs;
}
/* Clamp to the maximum number of allowed inlined push constants. */
if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
if (args->shader_info->num_inline_push_consts == num_push_consts &&
!args->shader_info->loads_dynamic_offsets) {
/* Disable the default push constants path if all constants are
* inlined and if shaders don't use dynamic descriptors.
*/
args->shader_info->loads_push_constants = false;
}
args->shader_info->base_inline_push_consts =
args->shader_info->min_push_constant_used / 4;
}
static void allocate_user_sgprs(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage,
bool needs_view_index,
struct user_sgpr_info *user_sgpr_info)
{
uint8_t user_sgpr_count = 0;
memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
/* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
if (stage == MESA_SHADER_GEOMETRY ||
stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_CTRL ||
stage == MESA_SHADER_TESS_EVAL ||
args->is_gs_copy_shader)
user_sgpr_info->need_ring_offsets = true;
if (stage == MESA_SHADER_FRAGMENT &&
args->shader_info->ps.needs_sample_positions)
user_sgpr_info->need_ring_offsets = true;
/* 2 user sgprs will nearly always be allocated for scratch/rings */
if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) {
user_sgpr_count += 2;
}
switch (stage) {
case MESA_SHADER_COMPUTE:
if (args->shader_info->cs.uses_grid_size)
user_sgpr_count += 3;
break;
case MESA_SHADER_FRAGMENT:
user_sgpr_count += args->shader_info->ps.needs_sample_positions;
break;
case MESA_SHADER_VERTEX:
if (!args->is_gs_copy_shader)
user_sgpr_count += count_vs_user_sgprs(args);
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX)
user_sgpr_count += count_vs_user_sgprs(args);
}
break;
case MESA_SHADER_TESS_EVAL:
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX) {
user_sgpr_count += count_vs_user_sgprs(args);
}
}
break;
default:
break;
}
if (needs_view_index)
user_sgpr_count++;
if (args->shader_info->loads_push_constants)
user_sgpr_count++;
if (args->shader_info->so.num_outputs)
user_sgpr_count++;
uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
uint32_t num_desc_set =
util_bitcount(args->shader_info->desc_set_used_mask);
if (remaining_sgprs < num_desc_set) {
user_sgpr_info->indirect_all_descriptor_sets = true;
user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
} else {
user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
}
allocate_inline_push_consts(args, user_sgpr_info);
}
static void
declare_global_input_sgprs(struct radv_shader_args *args,
const struct user_sgpr_info *user_sgpr_info)
{
/* 1 for each descriptor set */
if (!user_sgpr_info->indirect_all_descriptor_sets) {
uint32_t mask = args->shader_info->desc_set_used_mask;
while (mask) {
int i = u_bit_scan(&mask);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
&args->descriptor_sets[i]);
}
} else {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR,
&args->descriptor_sets[0]);
}
if (args->shader_info->loads_push_constants) {
/* 1 for push constants and dynamic descriptors */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
&args->ac.push_constants);
}
for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.inline_push_consts[i]);
}
args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts;
args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts;
if (args->shader_info->so.num_outputs) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
&args->streamout_buffers);
}
}
static void
declare_vs_specific_input_sgprs(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage)
{
if (!args->is_gs_copy_shader &&
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (args->shader_info->vs.has_vertex_buffers) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
&args->vertex_buffers);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
if (args->shader_info->vs.needs_draw_id) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
}
}
}
static void
declare_vs_input_vgprs(struct radv_shader_args *args)
{
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
if (!args->is_gs_copy_shader) {
if (args->options->key.vs_common_out.as_ls) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id);
if (args->options->chip_class >= GFX10) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
} else {
if (args->options->chip_class >= GFX10) {
if (args->options->key.vs_common_out.as_ngg) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
}
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
}
}
}
static void
declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
{
int i;
if (args->options->use_ngg_streamout) {
if (stage == MESA_SHADER_TESS_EVAL)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
return;
}
/* Streamout SGPRs. */
if (args->shader_info->so.num_outputs) {
assert(stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_EVAL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx);
} else if (stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
}
/* A streamout buffer offset is loaded if the stride is non-zero. */
for (i = 0; i < 4; i++) {
if (!args->shader_info->so.strides[i])
continue;
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]);
}
}
static void
declare_tes_input_vgprs(struct radv_shader_args *args)
{
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
}
static void
set_global_input_locs(struct radv_shader_args *args,
const struct user_sgpr_info *user_sgpr_info,
uint8_t *user_sgpr_idx)
{
uint32_t mask = args->shader_info->desc_set_used_mask;
if (!user_sgpr_info->indirect_all_descriptor_sets) {
while (mask) {
int i = u_bit_scan(&mask);
set_loc_desc(args, i, user_sgpr_idx);
}
} else {
set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS,
user_sgpr_idx);
args->shader_info->need_indirect_descriptor_sets = true;
}
if (args->shader_info->loads_push_constants) {
set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
if (args->shader_info->num_inline_push_consts) {
set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
args->shader_info->num_inline_push_consts);
}
if (args->streamout_buffers.used) {
set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS,
user_sgpr_idx);
}
}
static void
set_vs_specific_input_locs(struct radv_shader_args *args,
gl_shader_stage stage, bool has_previous_stage,
gl_shader_stage previous_stage,
uint8_t *user_sgpr_idx)
{
if (!args->is_gs_copy_shader &&
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (args->shader_info->vs.has_vertex_buffers) {
set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS,
user_sgpr_idx);
}
unsigned vs_num = 2;
if (args->shader_info->vs.needs_draw_id)
vs_num++;
set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
user_sgpr_idx, vs_num);
}
}
/* Returns whether the stage is a stage that can be directly before the GS */
static bool is_pre_gs_stage(gl_shader_stage stage)
{
return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
}
void
radv_declare_shader_args(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage)
{
struct user_sgpr_info user_sgpr_info;
bool needs_view_index = needs_view_index_sgpr(args, stage);
if (args->options->chip_class >= GFX10) {
if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) {
/* On GFX10, VS is merged into GS for NGG. */
previous_stage = stage;
stage = MESA_SHADER_GEOMETRY;
has_previous_stage = true;
}
}
for (int i = 0; i < MAX_SETS; i++)
args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
for (int i = 0; i < AC_UD_MAX_UD; i++)
args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
allocate_user_sgprs(args, stage, has_previous_stage,
previous_stage, needs_view_index, &user_sgpr_info);
if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
&args->ring_offsets);
}
switch (stage) {
case MESA_SHADER_COMPUTE:
declare_global_input_sgprs(args, &user_sgpr_info);
if (args->shader_info->cs.uses_grid_size) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT,
&args->ac.num_work_groups);
}
for (int i = 0; i < 3; i++) {
if (args->shader_info->cs.uses_block_id[i]) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.workgroup_ids[i]);
}
}
if (args->shader_info->cs.uses_local_invocation_idx) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.tg_size);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
&args->ac.local_invocation_ids);
break;
case MESA_SHADER_VERTEX:
declare_global_input_sgprs(args, &user_sgpr_info);
declare_vs_specific_input_sgprs(args, stage, has_previous_stage,
previous_stage);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
if (args->options->key.vs_common_out.as_es) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->es2gs_offset);
} else if (args->options->key.vs_common_out.as_ls) {
/* no extra parameters */
} else {
declare_streamout_sgprs(args, stage);
}
declare_vs_input_vgprs(args);
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
// First 6 system regs
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
declare_global_input_sgprs(args, &user_sgpr_info);
declare_vs_specific_input_sgprs(args, stage,
has_previous_stage,
previous_stage);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_rel_ids);
declare_vs_input_vgprs(args);
} else {
declare_global_input_sgprs(args, &user_sgpr_info);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_rel_ids);
}
break;
case MESA_SHADER_TESS_EVAL:
declare_global_input_sgprs(args, &user_sgpr_info);
if (needs_view_index)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
if (args->options->key.vs_common_out.as_es) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->es2gs_offset);
} else {
declare_streamout_sgprs(args, stage);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
}
declare_tes_input_vgprs(args);
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
// First 6 system regs
if (args->options->key.vs_common_out.as_ngg) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->gs_tg_info);
} else {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->gs2vs_offset);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
declare_global_input_sgprs(args, &user_sgpr_info);
if (previous_stage != MESA_SHADER_TESS_EVAL) {
declare_vs_specific_input_sgprs(args, stage,
has_previous_stage,
previous_stage);
}
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_invocation_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[4]);
if (previous_stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(args);
} else {
declare_tes_input_vgprs(args);
}
} else {
declare_global_input_sgprs(args, &user_sgpr_info);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->ac.view_index);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[3]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[4]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[5]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.gs_invocation_id);
}
break;
case MESA_SHADER_FRAGMENT:
declare_global_input_sgprs(args, &user_sgpr_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, NULL); /* persp pull model */
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
break;
default:
unreachable("Shader stage not implemented");
}
args->shader_info->num_input_vgprs = 0;
args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0;
args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
if (stage != MESA_SHADER_FRAGMENT)
args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
uint8_t user_sgpr_idx = 0;
if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) {
set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
&user_sgpr_idx);
}
/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
* the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
if (has_previous_stage)
user_sgpr_idx = 0;
set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
switch (stage) {
case MESA_SHADER_COMPUTE:
if (args->shader_info->cs.uses_grid_size) {
set_loc_shader(args, AC_UD_CS_GRID_SIZE,
&user_sgpr_idx, 3);
}
break;
case MESA_SHADER_VERTEX:
set_vs_specific_input_locs(args, stage, has_previous_stage,
previous_stage, &user_sgpr_idx);
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_CTRL:
set_vs_specific_input_locs(args, stage, has_previous_stage,
previous_stage, &user_sgpr_idx);
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_EVAL:
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX)
set_vs_specific_input_locs(args, stage,
has_previous_stage,
previous_stage,
&user_sgpr_idx);
}
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_FRAGMENT:
break;
default:
unreachable("Shader stage not implemented");
}
args->shader_info->num_user_sgprs = user_sgpr_idx;
}

View File

@@ -24,6 +24,7 @@
#include "ac_shader_args.h"
#include "radv_constants.h"
#include "util/list.h"
#include "compiler/shader_enums.h"
#include "amd_family.h"
struct radv_shader_args {
@@ -73,4 +74,8 @@ radv_shader_args_from_ac(struct ac_shader_args *args)
return (struct radv_shader_args *) container_of(args, radv_args, ac);
}
void radv_declare_shader_args(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage);