radv/llvm: reduce LDS size for tess by using NIR IO assigned locations

To match ACO.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7022>
This commit is contained in:
Samuel Pitoiset
2020-10-02 13:36:05 +02:00
parent 47e26bf334
commit cec12d4f98
4 changed files with 14 additions and 106 deletions

View File

@@ -149,8 +149,8 @@ get_tcs_in_patch_stride(struct radv_shader_context *ctx)
static LLVMValueRef
get_tcs_out_patch_stride(struct radv_shader_context *ctx)
{
uint32_t num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->args->shader_info->tcs.patch_outputs_written);
uint32_t num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
uint32_t num_tcs_patch_outputs = ctx->args->shader_info->tcs.num_linked_patch_outputs;
uint32_t output_vertex_size = num_tcs_outputs * 16;
uint32_t pervertex_output_patch_size = ctx->shader->info.tess.tcs_vertices_out * output_vertex_size;
uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
@@ -161,7 +161,7 @@ get_tcs_out_patch_stride(struct radv_shader_context *ctx)
static LLVMValueRef
get_tcs_out_vertex_stride(struct radv_shader_context *ctx)
{
uint32_t num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
uint32_t num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
uint32_t output_vertex_size = num_tcs_outputs * 16;
output_vertex_size /= 4;
return LLVMConstInt(ctx->ac.i32, output_vertex_size, false);
@@ -189,7 +189,7 @@ get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx)
uint32_t input_patch_size = ctx->args->options->key.tcs.input_vertices * input_vertex_size;
uint32_t output_patch0_offset = input_patch_size;
uint32_t num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
uint32_t num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
uint32_t output_vertex_size = num_tcs_outputs * 16;
uint32_t pervertex_output_patch_size = ctx->shader->info.tess.tcs_vertices_out * output_vertex_size;
unsigned num_patches = ctx->tcs_num_patches;
@@ -404,9 +404,9 @@ static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx)
uint32_t num_patches = ctx->tcs_num_patches;
uint32_t num_tcs_outputs;
if (ctx->stage == MESA_SHADER_TESS_CTRL)
num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
else
num_tcs_outputs = ctx->args->options->key.tes.tcs_num_outputs;
num_tcs_outputs = ctx->args->shader_info->tes.num_linked_inputs;
uint32_t output_vertex_size = num_tcs_outputs * 16;
uint32_t pervertex_output_patch_size = ctx->shader->info.tess.tcs_vertices_out * output_vertex_size;
@@ -2031,7 +2031,7 @@ static void
handle_ls_outputs_post(struct radv_shader_context *ctx)
{
LLVMValueRef vertex_id = ctx->rel_auto_id;
uint32_t num_tcs_inputs = util_last_bit64(ctx->args->shader_info->vs.ls_outputs_written);
uint32_t num_tcs_inputs = ctx->args->shader_info->vs.num_linked_outputs;
LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false);
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
vertex_dw_stride, "");
@@ -3945,12 +3945,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.abi.load_tess_varyings = load_tcs_varyings;
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.abi.store_tcs_outputs = store_tcs_output;
if (shader_count == 1)
ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
else
ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
ctx.tcs_num_inputs = ctx.args->shader_info->tcs.num_linked_inputs;
unsigned tcs_num_outputs = ctx.args->shader_info->tcs.num_linked_outputs;
unsigned tcs_num_patch_outputs = ctx.args->shader_info->tcs.num_linked_patch_outputs;
ctx.tcs_num_patches =
get_tcs_num_patches(
ctx.args->options->key.tcs.input_vertices,
@@ -4061,8 +4058,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
}
if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
unsigned tcs_num_outputs = ctx.args->shader_info->tcs.num_linked_outputs;
unsigned tcs_num_patch_outputs = ctx.args->shader_info->tcs.num_linked_patch_outputs;
args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
args->shader_info->tcs.num_lds_blocks =
calculate_tess_lds_size(

View File

@@ -2544,7 +2544,6 @@ radv_fill_shader_keys(struct radv_device *device,
if (nir[MESA_SHADER_TESS_CTRL]) {
keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs = 0;
keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = key->tess_input_vertices;
keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode = nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
@@ -2733,8 +2732,6 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
keys[MESA_SHADER_TESS_EVAL].tes.num_patches =
infos[MESA_SHADER_TESS_CTRL].tcs.num_patches;
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs =
util_last_bit64(infos[MESA_SHADER_TESS_CTRL].tcs.outputs_written);
filled_stages |= (1 << MESA_SHADER_VERTEX);
filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
@@ -2762,16 +2759,9 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
while (active_stages) {
int i = u_bit_scan(&active_stages);
if (i == MESA_SHADER_TESS_CTRL) {
keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs =
util_last_bit64(infos[MESA_SHADER_VERTEX].vs.ls_outputs_written);
}
if (i == MESA_SHADER_TESS_EVAL) {
keys[MESA_SHADER_TESS_EVAL].tes.num_patches =
infos[MESA_SHADER_TESS_CTRL].tcs.num_patches;
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs =
util_last_bit64(infos[MESA_SHADER_TESS_CTRL].tcs.outputs_written);
}
radv_nir_shader_info_init(&infos[i]);
@@ -3104,7 +3094,6 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
}
modules[MESA_SHADER_VERTEX] = NULL;
keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written);
}
if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
@@ -3128,12 +3117,8 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
if(modules[i] && !pipeline->shaders[i]) {
if (i == MESA_SHADER_TESS_CTRL) {
keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs = util_last_bit64(pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.ls_outputs_written);
}
if (i == MESA_SHADER_TESS_EVAL) {
keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written);
}
radv_start_feedback(stage_feedbacks[i]);

View File

@@ -91,14 +91,12 @@ struct radv_tes_variant_key {
struct radv_vs_out_key out;
uint8_t num_patches;
uint8_t tcs_num_outputs;
};
struct radv_tcs_variant_key {
struct radv_vs_variant_key vs_key;
unsigned primitive_mode;
unsigned input_vertices;
unsigned num_inputs;
uint32_t tes_reads_tess_factors:1;
};
@@ -267,7 +265,6 @@ struct radv_shader_info {
bool is_ngg;
bool is_ngg_passthrough;
struct {
uint64_t ls_outputs_written;
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
bool has_vertex_buffers; /* needs vertex buffers and base/start */
@@ -339,8 +336,6 @@ struct radv_shader_info {
unsigned block_size[3];
} cs;
struct {
uint64_t outputs_written;
uint64_t patch_outputs_written;
uint64_t tes_inputs_read;
uint64_t tes_patch_inputs_read;
unsigned tcs_vertices_out;
@@ -526,30 +521,6 @@ radv_dump_shader_stats(struct radv_device *device,
struct radv_pipeline *pipeline,
gl_shader_stage stage, FILE *output);
static inline unsigned
shader_io_get_unique_index(gl_varying_slot slot)
{
/* handle patch indices separate */
if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
return 0;
if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
return 1;
if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
return 2 + (slot - VARYING_SLOT_PATCH0);
if (slot == VARYING_SLOT_POS)
return 0;
if (slot == VARYING_SLOT_PSIZ)
return 1;
if (slot == VARYING_SLOT_CLIP_DIST0)
return 2;
if (slot == VARYING_SLOT_CLIP_DIST1)
return 3;
/* 3 is reserved for clip dist as well */
if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
return 4 + (slot - VARYING_SLOT_VAR0);
unreachable("illegal slot in get unique index\n");
}
static inline unsigned
calculate_tess_lds_size(enum chip_class chip_class,
unsigned tcs_num_input_vertices,

View File

@@ -31,23 +31,6 @@ static void mark_sampler_desc(const nir_variable *var,
info->desc_set_used_mask |= (1u << var->data.descriptor_set);
}
static void mark_ls_output(struct radv_shader_info *info,
uint32_t param, int num_slots)
{
uint64_t mask = (1ull << num_slots) - 1ull;
info->vs.ls_outputs_written |= (mask << param);
}
static void mark_tess_output(struct radv_shader_info *info,
bool is_patch, uint32_t param, int num_slots)
{
uint64_t mask = (1ull << num_slots) - 1ull;
if (is_patch)
info->tcs.patch_outputs_written |= (mask << param);
else
info->tcs.outputs_written |= (mask << param);
}
static void
gather_intrinsic_load_input_info(const nir_shader *nir,
const nir_intrinsic_instr *instr,
@@ -422,18 +405,6 @@ gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
}
}
static void
gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var,
struct radv_shader_info *info)
{
int idx = var->data.location;
unsigned param = shader_io_get_unique_index(idx);
int num_slots = glsl_count_attribute_slots(var->type, false);
if (var->data.compact)
num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4);
mark_ls_output(info, param, num_slots);
}
static void
gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var,
struct radv_shader_info *info)
@@ -497,9 +468,8 @@ gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
!key->vs_common_out.as_es)
vs_info = &info->vs.outinfo;
if (key->vs_common_out.as_ls)
gather_info_output_decl_ls(nir, var, info);
else if (key->vs_common_out.as_ngg)
/* TODO: Adjust as_ls/as_nng. */
if (!key->vs_common_out.as_ls && key->vs_common_out.as_ngg)
gather_info_output_decl_gs(nir, var, info);
break;
case MESA_SHADER_GEOMETRY:
@@ -510,21 +480,6 @@ gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
if (!key->vs_common_out.as_es)
vs_info = &info->tes.outinfo;
break;
case MESA_SHADER_TESS_CTRL: {
unsigned param = shader_io_get_unique_index(var->data.location);
const struct glsl_type *type = var->type;
if (!var->data.patch)
type = glsl_get_array_element(var->type);
unsigned slots =
var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4)
: glsl_count_attribute_slots(type, false);
mark_tess_output(info, var->data.patch, param, slots);
break;
}
default:
break;
}