radv,aco: remap PS inputs when declaring shader arguments
LLVM seems to require all inputs for PS. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13192>
This commit is contained in:
@@ -11257,37 +11257,8 @@ emit_streamout(isel_context* ctx, unsigned stream)
|
|||||||
Pseudo_instruction*
|
Pseudo_instruction*
|
||||||
add_startpgm(struct isel_context* ctx)
|
add_startpgm(struct isel_context* ctx)
|
||||||
{
|
{
|
||||||
unsigned arg_count = ctx->args->ac.arg_count;
|
|
||||||
if (ctx->stage == fragment_fs) {
|
|
||||||
/* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr
|
|
||||||
* itself and then communicates the results back via the ELF binary.
|
|
||||||
* Mirror what LLVM does by re-mapping the VGPR arguments here.
|
|
||||||
*
|
|
||||||
* TODO: If we made the FS input scanning code into a separate pass that
|
|
||||||
* could run before argument setup, then this wouldn't be necessary
|
|
||||||
* anymore.
|
|
||||||
*/
|
|
||||||
struct ac_shader_args* args = &ctx->args->ac;
|
|
||||||
arg_count = 0;
|
|
||||||
for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->arg_count; i++) {
|
|
||||||
if (args->args[i].file != AC_ARG_VGPR) {
|
|
||||||
arg_count++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(ctx->program->config->spi_ps_input_addr & (1 << vgpr_arg))) {
|
|
||||||
args->args[i].skip = true;
|
|
||||||
} else {
|
|
||||||
args->args[i].offset = vgpr_reg;
|
|
||||||
vgpr_reg += args->args[i].size;
|
|
||||||
arg_count++;
|
|
||||||
}
|
|
||||||
vgpr_arg++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
aco_ptr<Pseudo_instruction> startpgm{
|
aco_ptr<Pseudo_instruction> startpgm{
|
||||||
create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, arg_count)};
|
create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, ctx->args->ac.arg_count)};
|
||||||
for (unsigned i = 0, arg = 0; i < ctx->args->ac.arg_count; i++) {
|
for (unsigned i = 0, arg = 0; i < ctx->args->ac.arg_count; i++) {
|
||||||
if (ctx->args->ac.args[i].skip)
|
if (ctx->args->ac.args[i].skip)
|
||||||
continue;
|
continue;
|
||||||
|
@@ -1676,6 +1676,7 @@ radv_shader_variant_compile(struct radv_device *device, struct vk_shader_module
|
|||||||
options.key = *key;
|
options.key = *key;
|
||||||
|
|
||||||
options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
|
options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
|
||||||
|
options.remap_spi_ps_input = !radv_use_llvm_for_stage(device, stage);
|
||||||
options.robust_buffer_access = device->robust_buffer_access;
|
options.robust_buffer_access = device->robust_buffer_access;
|
||||||
options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
|
options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
|
||||||
|
|
||||||
@@ -1693,6 +1694,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader
|
|||||||
gl_shader_stage stage = MESA_SHADER_VERTEX;
|
gl_shader_stage stage = MESA_SHADER_VERTEX;
|
||||||
|
|
||||||
options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
|
options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
|
||||||
|
options.remap_spi_ps_input = !radv_use_llvm_for_stage(device, stage);
|
||||||
options.key.has_multiview_view_index = multiview;
|
options.key.has_multiview_view_index = multiview;
|
||||||
options.key.optimisations_disabled = disable_optimizations;
|
options.key.optimisations_disabled = disable_optimizations;
|
||||||
|
|
||||||
|
@@ -112,6 +112,7 @@ struct radv_nir_compiler_options {
|
|||||||
bool has_image_load_dcc_bug;
|
bool has_image_load_dcc_bug;
|
||||||
bool enable_mrt_output_nan_fixup;
|
bool enable_mrt_output_nan_fixup;
|
||||||
bool wgp_mode;
|
bool wgp_mode;
|
||||||
|
bool remap_spi_ps_input;
|
||||||
enum radeon_family family;
|
enum radeon_family family;
|
||||||
enum chip_class chip_class;
|
enum chip_class chip_class;
|
||||||
const struct radeon_info *info;
|
const struct radeon_info *info;
|
||||||
|
@@ -363,6 +363,52 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
|
|||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
declare_ps_input_vgprs(struct radv_shader_args *args)
|
||||||
|
{
|
||||||
|
unsigned spi_ps_input = args->shader_info->ps.spi_ps_input;
|
||||||
|
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
|
||||||
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
|
||||||
|
|
||||||
|
if (args->options->remap_spi_ps_input) {
|
||||||
|
/* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
|
||||||
|
* communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
|
||||||
|
* VGPR arguments here.
|
||||||
|
*/
|
||||||
|
unsigned arg_count = 0;
|
||||||
|
for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
|
||||||
|
if (args->ac.args[i].file != AC_ARG_VGPR) {
|
||||||
|
arg_count++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(spi_ps_input & (1 << vgpr_arg))) {
|
||||||
|
args->ac.args[i].skip = true;
|
||||||
|
} else {
|
||||||
|
args->ac.args[i].offset = vgpr_reg;
|
||||||
|
vgpr_reg += args->ac.args[i].size;
|
||||||
|
arg_count++;
|
||||||
|
}
|
||||||
|
vgpr_arg++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
|
declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
|
||||||
{
|
{
|
||||||
@@ -654,22 +700,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
|
|||||||
if (args->options->explicit_scratch_args) {
|
if (args->options->explicit_scratch_args) {
|
||||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||||
}
|
}
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
|
declare_ps_input_vgprs(args);
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("Shader stage not implemented");
|
unreachable("Shader stage not implemented");
|
||||||
|
Reference in New Issue
Block a user