radeonsi: replace llvm legacy gs code with nir lowering

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19489>
This commit is contained in:
Qiang Yu
2022-12-01 09:55:57 +08:00
committed by Marge Bot
parent 6219374c4e
commit 1475339543
4 changed files with 21 additions and 92 deletions

View File

@@ -1767,8 +1767,11 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader)
si_nir_assign_param_offsets(nir, shader, slot_remap);
}
struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args,
bool *free_nir, uint64_t tcs_vgpr_only_inputs)
struct nir_shader *si_get_nir_shader(struct si_shader *shader,
struct si_shader_args *args,
bool *free_nir,
uint64_t tcs_vgpr_only_inputs,
ac_nir_gs_output_info *output_info)
{
struct si_shader_selector *sel = shader->selector;
const union si_shader_key *key = &shader->key;
@@ -1916,6 +1919,9 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_
}
}
if (sel->stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg)
NIR_PASS_V(nir, ac_nir_lower_legacy_gs, false, sel->screen->use_ngg, output_info);
NIR_PASS(progress2, nir, si_nir_lower_abi, shader, args);
if (progress2 || opt_offsets)
@@ -2111,7 +2117,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
si_init_shader_args(shader, &args);
bool free_nir;
struct nir_shader *nir = si_get_nir_shader(shader, &args, &free_nir, 0);
struct nir_shader *nir =
si_get_nir_shader(shader, &args, &free_nir, 0, &legacy_gs_output_info.info);
/* Dump NIR before doing NIR->LLVM conversion in case the
* conversion fails. */

View File

@@ -147,8 +147,6 @@ struct si_shader_context {
LLVMValueRef gs_ngg_emit;
struct ac_llvm_pointer gs_ngg_scratch;
LLVMValueRef return_value;
LLVMValueRef gs_emitted_vertices;
};
static inline struct si_shader_context *si_shader_context_from_abi(struct ac_shader_abi *abi)
@@ -156,6 +154,9 @@ static inline struct si_shader_context *si_shader_context_from_abi(struct ac_sha
return container_of(abi, struct si_shader_context, abi);
}
struct ac_nir_gs_output_info;
typedef struct ac_nir_gs_output_info ac_nir_gs_output_info;
/* si_shader.c */
bool si_is_multi_part_shader(struct si_shader *shader);
bool si_is_merged_shader(struct si_shader *shader);
@@ -169,7 +170,8 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
const struct si_vs_prolog_bits *prolog_key,
struct si_shader *shader_out, union si_shader_part_key *key);
struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args,
bool *free_nir, uint64_t tcs_vgpr_only_inputs);
bool *free_nir, uint64_t tcs_vgpr_only_inputs,
ac_nir_gs_output_info *output_info);
void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *key);
bool si_need_ps_prolog(const union si_shader_part_key *key);
void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key,

View File

@@ -849,8 +849,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
LLVMSetAlignment(ctx->gs_ngg_emit, 4);
} else {
si_preload_gs_rings(ctx);
ctx->gs_emitted_vertices = LLVMConstInt(ctx->ac.i32, 0, false);
}
break;
@@ -1172,7 +1170,8 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_init_shader_args(&shader_ls, ctx.args);
bool free_nir;
nir = si_get_nir_shader(&shader_ls, ctx.args, &free_nir, sel->info.tcs_vgpr_only_inputs);
nir = si_get_nir_shader(&shader_ls, ctx.args, &free_nir,
sel->info.tcs_vgpr_only_inputs, NULL);
si_update_shader_binary_info(shader, nir);
if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir)) {
@@ -1246,7 +1245,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_init_shader_args(&shader_es, ctx.args);
bool free_nir;
nir = si_get_nir_shader(&shader_es, ctx.args, &free_nir, 0);
nir = si_get_nir_shader(&shader_es, ctx.args, &free_nir, 0, NULL);
si_update_shader_binary_info(shader, nir);
if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir)) {

View File

@@ -105,57 +105,9 @@ static LLVMValueRef ngg_get_emulated_counters_buf(struct si_shader_context *ctx)
void si_llvm_gs_build_end(struct si_shader_context *ctx)
{
struct si_shader_info UNUSED *info = &ctx->shader->selector->info;
assert(info->num_outputs <= AC_LLVM_MAX_OUTPUTS);
if (ctx->screen->info.gfx_level >= GFX10)
ac_build_waitcnt(&ctx->ac, AC_WAIT_VSTORE);
if (ctx->screen->use_ngg) {
/* Implement PIPE_STAT_QUERY_GS_PRIMITIVES for non-ngg draws because we can't
* use pipeline statistics (they would be correct but when screen->use_ngg, we
* can't know when the query is started if the next draw(s) will use ngg or not).
*/
LLVMValueRef tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU);
tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, tmp, 5229); /* if (GS_PIPELINE_STATS_EMU) */
{
LLVMValueRef prim = ctx->ac.i32_0;
switch (ctx->shader->selector->info.base.gs.output_primitive) {
case SHADER_PRIM_POINTS:
prim = ctx->gs_emitted_vertices;
break;
case SHADER_PRIM_LINE_STRIP:
prim = LLVMBuildSub(ctx->ac.builder, ctx->gs_emitted_vertices, ctx->ac.i32_1, "");
prim = ac_build_imax(&ctx->ac, prim, ctx->ac.i32_0);
break;
case SHADER_PRIM_TRIANGLE_STRIP:
prim = LLVMBuildSub(ctx->ac.builder, ctx->gs_emitted_vertices, LLVMConstInt(ctx->ac.i32, 2, 0), "");
prim = ac_build_imax(&ctx->ac, prim, ctx->ac.i32_0);
break;
}
LLVMValueRef args[] = {
prim,
ngg_get_emulated_counters_buf(ctx),
LLVMConstInt(ctx->ac.i32,
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4,
false),
ctx->ac.i32_0, /* soffset */
ctx->ac.i32_0, /* cachepolicy */
};
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0);
args[0] = ctx->ac.i32_1;
args[2] = LLVMConstInt(ctx->ac.i32,
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4,
false);
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0);
}
ac_build_endif(&ctx->ac, 5229);
}
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, si_get_gs_wave_id(ctx));
if (ctx->screen->info.gfx_level >= GFX9)
@@ -170,40 +122,9 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream,
assert(!ctx->shader->key.ge.as_ngg);
struct si_shader_info *info = &ctx->shader->selector->info;
struct si_shader *shader = ctx->shader;
LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset);
unsigned offset = 0;
for (unsigned i = 0; i < info->num_outputs; i++) {
for (unsigned chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)) ||
((info->output_streams[i] >> (2 * chan)) & 3) != stream)
continue;
LLVMValueRef out_val = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], "");
LLVMValueRef voffset =
LLVMConstInt(ctx->ac.i32, offset * shader->selector->info.base.gs.vertices_out, 0);
offset++;
voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, 0), "");
out_val = ac_to_integer(&ctx->ac, out_val);
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL,
voffset, soffset, ac_glc | ac_slc | ac_swizzled);
}
}
/* Signal vertex emission if vertex data was written. */
if (offset) {
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
si_get_gs_wave_id(ctx));
ctx->gs_emitted_vertices = LLVMBuildAdd(ctx->ac.builder, ctx->gs_emitted_vertices,
ctx->ac.i32_1, "vert");
}
/* Signal vertex emission */
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
si_get_gs_wave_id(ctx));
}
/* Cut one primitive from the geometry shader */