diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 142e094d7b5..adc09f4183d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1767,8 +1767,11 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader) si_nir_assign_param_offsets(nir, shader, slot_remap); } -struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args, - bool *free_nir, uint64_t tcs_vgpr_only_inputs) +struct nir_shader *si_get_nir_shader(struct si_shader *shader, + struct si_shader_args *args, + bool *free_nir, + uint64_t tcs_vgpr_only_inputs, + ac_nir_gs_output_info *output_info) { struct si_shader_selector *sel = shader->selector; const union si_shader_key *key = &shader->key; @@ -1916,6 +1919,9 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_ } } + if (sel->stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) + NIR_PASS_V(nir, ac_nir_lower_legacy_gs, false, sel->screen->use_ngg, output_info); + NIR_PASS(progress2, nir, si_nir_lower_abi, shader, args); if (progress2 || opt_offsets) @@ -2111,7 +2117,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi si_init_shader_args(shader, &args); bool free_nir; - struct nir_shader *nir = si_get_nir_shader(shader, &args, &free_nir, 0); + struct nir_shader *nir = + si_get_nir_shader(shader, &args, &free_nir, 0, &legacy_gs_output_info.info); /* Dump NIR before doing NIR->LLVM conversion in case the * conversion fails. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 4f7a6b80ab3..3207c52c547 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -147,8 +147,6 @@ struct si_shader_context { LLVMValueRef gs_ngg_emit; struct ac_llvm_pointer gs_ngg_scratch; LLVMValueRef return_value; - - LLVMValueRef gs_emitted_vertices; }; static inline struct si_shader_context *si_shader_context_from_abi(struct ac_shader_abi *abi) @@ -156,6 +154,9 @@ static inline struct si_shader_context *si_shader_context_from_abi(struct ac_sha return container_of(abi, struct si_shader_context, abi); } +struct ac_nir_gs_output_info; +typedef struct ac_nir_gs_output_info ac_nir_gs_output_info; + /* si_shader.c */ bool si_is_multi_part_shader(struct si_shader *shader); bool si_is_merged_shader(struct si_shader *shader); @@ -169,7 +170,8 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_ const struct si_vs_prolog_bits *prolog_key, struct si_shader *shader_out, union si_shader_part_key *key); struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args, - bool *free_nir, uint64_t tcs_vgpr_only_inputs); + bool *free_nir, uint64_t tcs_vgpr_only_inputs, + ac_nir_gs_output_info *output_info); void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *key); bool si_need_ps_prolog(const union si_shader_part_key *key); void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 37f491eb47a..aa9cf565db1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -849,8 +849,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad LLVMSetAlignment(ctx->gs_ngg_emit, 4); } else { si_preload_gs_rings(ctx); - - ctx->gs_emitted_vertices = LLVMConstInt(ctx->ac.i32, 0, false); } break; @@ -1172,7 +1170,8 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_init_shader_args(&shader_ls, ctx.args); bool free_nir; - nir = si_get_nir_shader(&shader_ls, ctx.args, &free_nir, sel->info.tcs_vgpr_only_inputs); + nir = si_get_nir_shader(&shader_ls, ctx.args, &free_nir, + sel->info.tcs_vgpr_only_inputs, NULL); si_update_shader_binary_info(shader, nir); if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir)) { @@ -1246,7 +1245,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_init_shader_args(&shader_es, ctx.args); bool free_nir; - nir = si_get_nir_shader(&shader_es, ctx.args, &free_nir, 0); + nir = si_get_nir_shader(&shader_es, ctx.args, &free_nir, 0, NULL); si_update_shader_binary_info(shader, nir); if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir)) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 36c07d21a7e..30befe58f53 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -105,57 +105,9 @@ static LLVMValueRef ngg_get_emulated_counters_buf(struct si_shader_context *ctx) void si_llvm_gs_build_end(struct si_shader_context *ctx) { - struct si_shader_info UNUSED *info = &ctx->shader->selector->info; - - assert(info->num_outputs <= AC_LLVM_MAX_OUTPUTS); - if (ctx->screen->info.gfx_level >= GFX10) ac_build_waitcnt(&ctx->ac, AC_WAIT_VSTORE); - if (ctx->screen->use_ngg) { - /* Implement PIPE_STAT_QUERY_GS_PRIMITIVES for non-ngg draws because we can't - * use pipeline statistics (they would be correct but when screen->use_ngg, we - * can't know when the query is started if the next draw(s) will use ngg or not). - */ - LLVMValueRef tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU); - tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i1, ""); - ac_build_ifcc(&ctx->ac, tmp, 5229); /* if (GS_PIPELINE_STATS_EMU) */ - { - LLVMValueRef prim = ctx->ac.i32_0; - switch (ctx->shader->selector->info.base.gs.output_primitive) { - case SHADER_PRIM_POINTS: - prim = ctx->gs_emitted_vertices; - break; - case SHADER_PRIM_LINE_STRIP: - prim = LLVMBuildSub(ctx->ac.builder, ctx->gs_emitted_vertices, ctx->ac.i32_1, ""); - prim = ac_build_imax(&ctx->ac, prim, ctx->ac.i32_0); - break; - case SHADER_PRIM_TRIANGLE_STRIP: - prim = LLVMBuildSub(ctx->ac.builder, ctx->gs_emitted_vertices, LLVMConstInt(ctx->ac.i32, 2, 0), ""); - prim = ac_build_imax(&ctx->ac, prim, ctx->ac.i32_0); - break; - } - - LLVMValueRef args[] = { - prim, - ngg_get_emulated_counters_buf(ctx), - LLVMConstInt(ctx->ac.i32, - si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4, - false), - ctx->ac.i32_0, /* soffset */ - ctx->ac.i32_0, /* cachepolicy */ - }; - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0); - - args[0] = ctx->ac.i32_1; - args[2] = LLVMConstInt(ctx->ac.i32, - si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4, - false); - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0); - } - ac_build_endif(&ctx->ac, 5229); - } - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, si_get_gs_wave_id(ctx)); if (ctx->screen->info.gfx_level >= GFX9) @@ -170,40 +122,9 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, assert(!ctx->shader->key.ge.as_ngg); - struct si_shader_info *info = &ctx->shader->selector->info; - struct si_shader *shader = ctx->shader; - LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset); - - unsigned offset = 0; - for (unsigned i = 0; i < info->num_outputs; i++) { - for (unsigned chan = 0; chan < 4; chan++) { - if (!(info->output_usagemask[i] & (1 << chan)) || - ((info->output_streams[i] >> (2 * chan)) & 3) != stream) - continue; - - LLVMValueRef out_val = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], ""); - LLVMValueRef voffset = - LLVMConstInt(ctx->ac.i32, offset * shader->selector->info.base.gs.vertices_out, 0); - offset++; - - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, ""); - voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, 0), ""); - - out_val = ac_to_integer(&ctx->ac, out_val); - - ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, - voffset, soffset, ac_glc | ac_slc | ac_swizzled); - } - } - - /* Signal vertex emission if vertex data was written. */ - if (offset) { - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), - si_get_gs_wave_id(ctx)); - - ctx->gs_emitted_vertices = LLVMBuildAdd(ctx->ac.builder, ctx->gs_emitted_vertices, - ctx->ac.i32_1, "vert"); - } + /* Signal vertex emission */ + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), + si_get_gs_wave_id(ctx)); } /* Cut one primitive from the geometry shader */