radv/ac: eliminate unused vertex shader outputs. (v2)
This is ported from radeonsi, and I can see at least one Talos shader drops an export due to this, and saves some VGPR usage. v2: use shared code. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -31,6 +31,8 @@
|
||||
#include "util/bitscan.h"
|
||||
#include <llvm-c/Transforms/Scalar.h>
|
||||
#include "ac_shader_info.h"
|
||||
#include "ac_exp_param.h"
|
||||
|
||||
enum radeon_llvm_calling_convention {
|
||||
RADEON_LLVM_AMDGPU_VS = 87,
|
||||
RADEON_LLVM_AMDGPU_GS = 88,
|
||||
@@ -5133,7 +5135,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
|
||||
LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
|
||||
int i;
|
||||
|
||||
memset(outinfo->vs_output_param_offset, EXP_PARAM_UNDEFINED,
|
||||
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
|
||||
sizeof(outinfo->vs_output_param_offset));
|
||||
|
||||
if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
|
||||
@@ -5757,6 +5759,37 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
|
||||
LLVMDisposePassManager(passmgr);
|
||||
}
|
||||
|
||||
static void
|
||||
ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
|
||||
{
|
||||
struct ac_vs_output_info *outinfo;
|
||||
|
||||
if (ctx->stage == MESA_SHADER_FRAGMENT ||
|
||||
ctx->stage == MESA_SHADER_COMPUTE ||
|
||||
ctx->stage == MESA_SHADER_TESS_CTRL ||
|
||||
ctx->stage == MESA_SHADER_GEOMETRY)
|
||||
return;
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
if (ctx->options->key.vs.as_ls ||
|
||||
ctx->options->key.vs.as_es)
|
||||
return;
|
||||
outinfo = &ctx->shader_info->vs.outinfo;
|
||||
}
|
||||
|
||||
if (ctx->stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (ctx->options->key.vs.as_es)
|
||||
return;
|
||||
outinfo = &ctx->shader_info->tes.outinfo;
|
||||
}
|
||||
|
||||
ac_eliminate_const_vs_outputs(&ctx->ac,
|
||||
ctx->main_function,
|
||||
outinfo->vs_output_param_offset,
|
||||
VARYING_SLOT_MAX,
|
||||
&outinfo->param_exports);
|
||||
}
|
||||
|
||||
static void
|
||||
ac_setup_rings(struct nir_to_llvm_context *ctx)
|
||||
{
|
||||
@@ -5894,6 +5927,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
|
||||
LLVMBuildRetVoid(ctx.builder);
|
||||
|
||||
ac_llvm_finalize_module(&ctx);
|
||||
|
||||
ac_nir_eliminate_const_vs_outputs(&ctx);
|
||||
free(ctx.locals);
|
||||
ralloc_free(ctx.defs);
|
||||
ralloc_free(ctx.phis);
|
||||
|
@@ -120,27 +120,15 @@ struct ac_userdata_locations {
|
||||
struct ac_userdata_info shader_data[AC_UD_MAX_UD];
|
||||
};
|
||||
|
||||
enum {
|
||||
/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
|
||||
EXP_PARAM_OFFSET_0 = 0,
|
||||
EXP_PARAM_OFFSET_31 = 31,
|
||||
/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
|
||||
EXP_PARAM_DEFAULT_VAL_0000 = 64,
|
||||
EXP_PARAM_DEFAULT_VAL_0001,
|
||||
EXP_PARAM_DEFAULT_VAL_1110,
|
||||
EXP_PARAM_DEFAULT_VAL_1111,
|
||||
EXP_PARAM_UNDEFINED = 255,
|
||||
};
|
||||
|
||||
struct ac_vs_output_info {
|
||||
uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
|
||||
uint8_t clip_dist_mask;
|
||||
uint8_t cull_dist_mask;
|
||||
uint8_t param_exports;
|
||||
bool writes_pointsize;
|
||||
bool writes_layer;
|
||||
bool writes_viewport_index;
|
||||
uint32_t export_mask;
|
||||
unsigned param_exports;
|
||||
unsigned pos_exports;
|
||||
};
|
||||
|
||||
|
@@ -41,6 +41,7 @@
|
||||
#include "ac_nir_to_llvm.h"
|
||||
#include "vk_format.h"
|
||||
#include "util/debug.h"
|
||||
#include "ac_exp_param.h"
|
||||
|
||||
void radv_shader_variant_destroy(struct radv_device *device,
|
||||
struct radv_shader_variant *variant);
|
||||
@@ -1874,13 +1875,13 @@ static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
|
||||
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
|
||||
{
|
||||
uint32_t ps_input_cntl;
|
||||
if (offset <= EXP_PARAM_OFFSET_31)
|
||||
if (offset <= AC_EXP_PARAM_OFFSET_31)
|
||||
ps_input_cntl = S_028644_OFFSET(offset);
|
||||
else {
|
||||
/* The input is a DEFAULT_VAL constant. */
|
||||
assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||
offset <= EXP_PARAM_DEFAULT_VAL_1111);
|
||||
offset -= EXP_PARAM_DEFAULT_VAL_0000;
|
||||
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
|
||||
offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
|
||||
ps_input_cntl = S_028644_OFFSET(0x20) |
|
||||
S_028644_DEFAULT_VAL(offset);
|
||||
}
|
||||
@@ -1903,7 +1904,7 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
|
||||
|
||||
if (ps->info.fs.prim_id_input) {
|
||||
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
|
||||
if (vs_offset != EXP_PARAM_UNDEFINED) {
|
||||
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
|
||||
pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
|
||||
++ps_offset;
|
||||
}
|
||||
@@ -1911,7 +1912,7 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
|
||||
|
||||
if (ps->info.fs.layer_input) {
|
||||
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
|
||||
if (vs_offset != EXP_PARAM_UNDEFINED) {
|
||||
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
|
||||
pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
|
||||
++ps_offset;
|
||||
}
|
||||
@@ -1931,7 +1932,7 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
|
||||
continue;
|
||||
|
||||
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
|
||||
if (vs_offset == EXP_PARAM_UNDEFINED) {
|
||||
if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
|
||||
pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
|
||||
++ps_offset;
|
||||
continue;
|
||||
|
Reference in New Issue
Block a user