radv/llvm: reduce LDS size for tess by using NIR IO assigned locations
To match ACO. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7022>
This commit is contained in:
@@ -149,8 +149,8 @@ get_tcs_in_patch_stride(struct radv_shader_context *ctx)
|
||||
static LLVMValueRef
|
||||
get_tcs_out_patch_stride(struct radv_shader_context *ctx)
|
||||
{
|
||||
uint32_t num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
|
||||
uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->args->shader_info->tcs.patch_outputs_written);
|
||||
uint32_t num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
|
||||
uint32_t num_tcs_patch_outputs = ctx->args->shader_info->tcs.num_linked_patch_outputs;
|
||||
uint32_t output_vertex_size = num_tcs_outputs * 16;
|
||||
uint32_t pervertex_output_patch_size = ctx->shader->info.tess.tcs_vertices_out * output_vertex_size;
|
||||
uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
|
||||
@@ -161,7 +161,7 @@ get_tcs_out_patch_stride(struct radv_shader_context *ctx)
|
||||
static LLVMValueRef
|
||||
get_tcs_out_vertex_stride(struct radv_shader_context *ctx)
|
||||
{
|
||||
uint32_t num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
|
||||
uint32_t num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
|
||||
uint32_t output_vertex_size = num_tcs_outputs * 16;
|
||||
output_vertex_size /= 4;
|
||||
return LLVMConstInt(ctx->ac.i32, output_vertex_size, false);
|
||||
@@ -189,7 +189,7 @@ get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx)
|
||||
uint32_t input_patch_size = ctx->args->options->key.tcs.input_vertices * input_vertex_size;
|
||||
uint32_t output_patch0_offset = input_patch_size;
|
||||
|
||||
uint32_t num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
|
||||
uint32_t num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
|
||||
uint32_t output_vertex_size = num_tcs_outputs * 16;
|
||||
uint32_t pervertex_output_patch_size = ctx->shader->info.tess.tcs_vertices_out * output_vertex_size;
|
||||
unsigned num_patches = ctx->tcs_num_patches;
|
||||
@@ -404,9 +404,9 @@ static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx)
|
||||
uint32_t num_patches = ctx->tcs_num_patches;
|
||||
uint32_t num_tcs_outputs;
|
||||
if (ctx->stage == MESA_SHADER_TESS_CTRL)
|
||||
num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
|
||||
num_tcs_outputs = ctx->args->shader_info->tcs.num_linked_outputs;
|
||||
else
|
||||
num_tcs_outputs = ctx->args->options->key.tes.tcs_num_outputs;
|
||||
num_tcs_outputs = ctx->args->shader_info->tes.num_linked_inputs;
|
||||
|
||||
uint32_t output_vertex_size = num_tcs_outputs * 16;
|
||||
uint32_t pervertex_output_patch_size = ctx->shader->info.tess.tcs_vertices_out * output_vertex_size;
|
||||
@@ -2031,7 +2031,7 @@ static void
|
||||
handle_ls_outputs_post(struct radv_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef vertex_id = ctx->rel_auto_id;
|
||||
uint32_t num_tcs_inputs = util_last_bit64(ctx->args->shader_info->vs.ls_outputs_written);
|
||||
uint32_t num_tcs_inputs = ctx->args->shader_info->vs.num_linked_outputs;
|
||||
LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false);
|
||||
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
|
||||
vertex_dw_stride, "");
|
||||
@@ -3945,12 +3945,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
ctx.abi.load_tess_varyings = load_tcs_varyings;
|
||||
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
|
||||
ctx.abi.store_tcs_outputs = store_tcs_output;
|
||||
if (shader_count == 1)
|
||||
ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
|
||||
else
|
||||
ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
|
||||
unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
|
||||
unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
|
||||
ctx.tcs_num_inputs = ctx.args->shader_info->tcs.num_linked_inputs;
|
||||
unsigned tcs_num_outputs = ctx.args->shader_info->tcs.num_linked_outputs;
|
||||
unsigned tcs_num_patch_outputs = ctx.args->shader_info->tcs.num_linked_patch_outputs;
|
||||
ctx.tcs_num_patches =
|
||||
get_tcs_num_patches(
|
||||
ctx.args->options->key.tcs.input_vertices,
|
||||
@@ -4061,8 +4058,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
}
|
||||
|
||||
if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
|
||||
unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
|
||||
unsigned tcs_num_outputs = ctx.args->shader_info->tcs.num_linked_outputs;
|
||||
unsigned tcs_num_patch_outputs = ctx.args->shader_info->tcs.num_linked_patch_outputs;
|
||||
args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
|
||||
args->shader_info->tcs.num_lds_blocks =
|
||||
calculate_tess_lds_size(
|
||||
|
@@ -2544,7 +2544,6 @@ radv_fill_shader_keys(struct radv_device *device,
|
||||
|
||||
if (nir[MESA_SHADER_TESS_CTRL]) {
|
||||
keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
|
||||
keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs = 0;
|
||||
keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = key->tess_input_vertices;
|
||||
keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode = nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
|
||||
|
||||
@@ -2733,8 +2732,6 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
||||
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.num_patches =
|
||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_patches;
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs =
|
||||
util_last_bit64(infos[MESA_SHADER_TESS_CTRL].tcs.outputs_written);
|
||||
|
||||
filled_stages |= (1 << MESA_SHADER_VERTEX);
|
||||
filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
|
||||
@@ -2762,16 +2759,9 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
||||
while (active_stages) {
|
||||
int i = u_bit_scan(&active_stages);
|
||||
|
||||
if (i == MESA_SHADER_TESS_CTRL) {
|
||||
keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs =
|
||||
util_last_bit64(infos[MESA_SHADER_VERTEX].vs.ls_outputs_written);
|
||||
}
|
||||
|
||||
if (i == MESA_SHADER_TESS_EVAL) {
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.num_patches =
|
||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_patches;
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs =
|
||||
util_last_bit64(infos[MESA_SHADER_TESS_CTRL].tcs.outputs_written);
|
||||
}
|
||||
|
||||
radv_nir_shader_info_init(&infos[i]);
|
||||
@@ -3104,7 +3094,6 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
}
|
||||
modules[MESA_SHADER_VERTEX] = NULL;
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
|
||||
@@ -3128,12 +3117,8 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
if(modules[i] && !pipeline->shaders[i]) {
|
||||
if (i == MESA_SHADER_TESS_CTRL) {
|
||||
keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs = util_last_bit64(pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.ls_outputs_written);
|
||||
}
|
||||
if (i == MESA_SHADER_TESS_EVAL) {
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
|
||||
keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written);
|
||||
}
|
||||
|
||||
radv_start_feedback(stage_feedbacks[i]);
|
||||
|
@@ -91,14 +91,12 @@ struct radv_tes_variant_key {
|
||||
struct radv_vs_out_key out;
|
||||
|
||||
uint8_t num_patches;
|
||||
uint8_t tcs_num_outputs;
|
||||
};
|
||||
|
||||
struct radv_tcs_variant_key {
|
||||
struct radv_vs_variant_key vs_key;
|
||||
unsigned primitive_mode;
|
||||
unsigned input_vertices;
|
||||
unsigned num_inputs;
|
||||
uint32_t tes_reads_tess_factors:1;
|
||||
};
|
||||
|
||||
@@ -267,7 +265,6 @@ struct radv_shader_info {
|
||||
bool is_ngg;
|
||||
bool is_ngg_passthrough;
|
||||
struct {
|
||||
uint64_t ls_outputs_written;
|
||||
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
|
||||
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
|
||||
bool has_vertex_buffers; /* needs vertex buffers and base/start */
|
||||
@@ -339,8 +336,6 @@ struct radv_shader_info {
|
||||
unsigned block_size[3];
|
||||
} cs;
|
||||
struct {
|
||||
uint64_t outputs_written;
|
||||
uint64_t patch_outputs_written;
|
||||
uint64_t tes_inputs_read;
|
||||
uint64_t tes_patch_inputs_read;
|
||||
unsigned tcs_vertices_out;
|
||||
@@ -526,30 +521,6 @@ radv_dump_shader_stats(struct radv_device *device,
|
||||
struct radv_pipeline *pipeline,
|
||||
gl_shader_stage stage, FILE *output);
|
||||
|
||||
static inline unsigned
|
||||
shader_io_get_unique_index(gl_varying_slot slot)
|
||||
{
|
||||
/* handle patch indices separate */
|
||||
if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
|
||||
return 0;
|
||||
if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
|
||||
return 1;
|
||||
if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
|
||||
return 2 + (slot - VARYING_SLOT_PATCH0);
|
||||
if (slot == VARYING_SLOT_POS)
|
||||
return 0;
|
||||
if (slot == VARYING_SLOT_PSIZ)
|
||||
return 1;
|
||||
if (slot == VARYING_SLOT_CLIP_DIST0)
|
||||
return 2;
|
||||
if (slot == VARYING_SLOT_CLIP_DIST1)
|
||||
return 3;
|
||||
/* 3 is reserved for clip dist as well */
|
||||
if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
|
||||
return 4 + (slot - VARYING_SLOT_VAR0);
|
||||
unreachable("illegal slot in get unique index\n");
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
calculate_tess_lds_size(enum chip_class chip_class,
|
||||
unsigned tcs_num_input_vertices,
|
||||
|
@@ -31,23 +31,6 @@ static void mark_sampler_desc(const nir_variable *var,
|
||||
info->desc_set_used_mask |= (1u << var->data.descriptor_set);
|
||||
}
|
||||
|
||||
static void mark_ls_output(struct radv_shader_info *info,
|
||||
uint32_t param, int num_slots)
|
||||
{
|
||||
uint64_t mask = (1ull << num_slots) - 1ull;
|
||||
info->vs.ls_outputs_written |= (mask << param);
|
||||
}
|
||||
|
||||
static void mark_tess_output(struct radv_shader_info *info,
|
||||
bool is_patch, uint32_t param, int num_slots)
|
||||
{
|
||||
uint64_t mask = (1ull << num_slots) - 1ull;
|
||||
if (is_patch)
|
||||
info->tcs.patch_outputs_written |= (mask << param);
|
||||
else
|
||||
info->tcs.outputs_written |= (mask << param);
|
||||
}
|
||||
|
||||
static void
|
||||
gather_intrinsic_load_input_info(const nir_shader *nir,
|
||||
const nir_intrinsic_instr *instr,
|
||||
@@ -422,18 +405,6 @@ gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var,
|
||||
struct radv_shader_info *info)
|
||||
{
|
||||
int idx = var->data.location;
|
||||
unsigned param = shader_io_get_unique_index(idx);
|
||||
int num_slots = glsl_count_attribute_slots(var->type, false);
|
||||
if (var->data.compact)
|
||||
num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4);
|
||||
mark_ls_output(info, param, num_slots);
|
||||
}
|
||||
|
||||
static void
|
||||
gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var,
|
||||
struct radv_shader_info *info)
|
||||
@@ -497,9 +468,8 @@ gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
|
||||
!key->vs_common_out.as_es)
|
||||
vs_info = &info->vs.outinfo;
|
||||
|
||||
if (key->vs_common_out.as_ls)
|
||||
gather_info_output_decl_ls(nir, var, info);
|
||||
else if (key->vs_common_out.as_ngg)
|
||||
/* TODO: Adjust as_ls/as_nng. */
|
||||
if (!key->vs_common_out.as_ls && key->vs_common_out.as_ngg)
|
||||
gather_info_output_decl_gs(nir, var, info);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
@@ -510,21 +480,6 @@ gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
|
||||
if (!key->vs_common_out.as_es)
|
||||
vs_info = &info->tes.outinfo;
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL: {
|
||||
unsigned param = shader_io_get_unique_index(var->data.location);
|
||||
const struct glsl_type *type = var->type;
|
||||
|
||||
if (!var->data.patch)
|
||||
type = glsl_get_array_element(var->type);
|
||||
|
||||
unsigned slots =
|
||||
var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4)
|
||||
: glsl_count_attribute_slots(type, false);
|
||||
|
||||
mark_tess_output(info, var->data.patch, param, slots);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
Reference in New Issue
Block a user