diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 6431ae63a0e..8b3dff9782a 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4053,9 +4053,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins false); break; } - case nir_intrinsic_emit_vertex: - ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); - break; case nir_intrinsic_emit_vertex_with_counter: { unsigned stream = nir_intrinsic_stream_id(instr); LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 00f32c941a2..1268669a5f5 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -68,8 +68,6 @@ struct ac_shader_abi { void (*export_vertex)(struct ac_shader_abi *abi); - void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs); - void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream); void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream, diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 192cd0bd20f..b78957c0fe9 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1885,27 +1885,12 @@ static LLVMValueRef ngg_gs_get_emit_primflag_ptr(struct si_shader_context *ctx, return LLVMBuildGEP2(ctx->ac.builder, vertexptr.pointee_type, vertexptr.value, gep_idx, 3, ""); } -void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs) +void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs) { const struct si_shader_selector *sel = ctx->shader->selector; const struct si_shader_info *info = &sel->info; LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef tmp; - const LLVMValueRef vertexidx = LLVMBuildLoad2(builder, ctx->ac.i32, ctx->gs_next_vertex[stream], ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, skip the write: excessive vertex emissions are not - * supposed to have any effect. - */ - const LLVMValueRef can_emit = - LLVMBuildICmp(builder, LLVMIntULT, vertexidx, - LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, false), ""); - - tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, ""); - tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, ""); - LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]); - - ac_build_ifcc(&ctx->ac, can_emit, 9001); const struct ac_llvm_pointer vertexptr = ngg_gs_emit_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx), vertexidx); unsigned out_idx = 0; @@ -1923,6 +1908,13 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL } assert(out_idx * 4 == info->gsvs_vertex_size); + /* Store the current number of emitted vertices to zero out remaining + * primitive flags in case the geometry shader doesn't emit the maximum + * number of vertices. + */ + LLVMValueRef tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, ""); + LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]); + /* Determine and store whether this vertex completed a primitive. */ const LLVMValueRef curverts = LLVMBuildLoad2(builder, ctx->ac.i32, ctx->gs_curprim_verts[stream], ""); @@ -1955,8 +1947,6 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL tmp = LLVMBuildLoad2(builder, ctx->ac.i32, ctx->gs_generated_prims[stream], ""); tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), ""); LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]); - - ac_build_endif(&ctx->ac, 9001); } void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx) diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 3ce257881b6..daa3766c69c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -188,7 +188,8 @@ void gfx10_ngg_culling_build_end(struct si_shader_context *ctx); void gfx10_ngg_build_end(struct si_shader_context *ctx); void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef prim_count, enum ac_prim_count count_type); -void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs); +void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs); void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx); void gfx10_ngg_gs_build_end(struct si_shader_context *ctx); unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 363267119bc..c5b1c123231 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -162,48 +162,23 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx) } /* Emit one vertex from the geometry shader */ -static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) +static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); if (ctx->shader->key.ge.as_ngg) { - gfx10_ngg_gs_emit_vertex(ctx, stream, addrs); + gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs); return; } struct si_shader_info *info = &ctx->shader->selector->info; struct si_shader *shader = ctx->shader; LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args.gs2vs_offset); - LLVMValueRef gs_next_vertex; - LLVMValueRef can_emit; - unsigned chan, offset; - int i; - /* Write vertex attribute values to GSVS ring */ - gs_next_vertex = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i32, ctx->gs_next_vertex[stream], ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, skip the write: excessive vertex emissions are not - * supposed to have any effect. - * - * If the shader has no writes to memory, kill it instead. This skips - * further memory loads and may allow LLVM to skip to the end - * altogether. - */ - can_emit = - LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, - LLVMConstInt(ctx->ac.i32, shader->selector->info.base.gs.vertices_out, 0), ""); - - bool use_kill = !info->base.writes_memory; - if (use_kill) { - ac_build_kill_if_false(&ctx->ac, can_emit); - } else { - ac_build_ifcc(&ctx->ac, can_emit, 6505); - } - - offset = 0; - for (i = 0; i < info->num_outputs; i++) { - for (chan = 0; chan < 4; chan++) { + unsigned offset = 0; + for (unsigned i = 0; i < info->num_outputs; i++) { + for (unsigned chan = 0; chan < 4; chan++) { if (!(info->output_usagemask[i] & (1 << chan)) || ((info->output_streams[i] >> (2 * chan)) & 3) != stream) continue; @@ -213,7 +188,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM LLVMConstInt(ctx->ac.i32, offset * shader->selector->info.base.gs.vertices_out, 0); offset++; - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); + voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, ""); voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, 0), ""); out_val = ac_to_integer(&ctx->ac, out_val); @@ -223,9 +198,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM } } - gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->ac.i32_1, ""); - LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]); - /* Signal vertex emission if vertex data was written. */ if (offset) { ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), @@ -234,9 +206,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM ctx->gs_emitted_vertices = LLVMBuildAdd(ctx->ac.builder, ctx->gs_emitted_vertices, ctx->ac.i32_1, "vert"); } - - if (!use_kill) - ac_build_endif(&ctx->ac, 6505); } /* Cut one primitive from the geometry shader */ @@ -601,6 +570,6 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, void si_llvm_init_gs_callbacks(struct si_shader_context *ctx) { - ctx->abi.emit_vertex = si_llvm_emit_vertex; + ctx->abi.emit_vertex_with_counter = si_llvm_emit_vertex; ctx->abi.emit_primitive = si_llvm_emit_primitive; } diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 6fac0ae9f5d..b0c2e4ff5b5 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -297,6 +297,9 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) nir->info.stage == MESA_SHADER_GEOMETRY) NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out); + if (nir->info.stage == MESA_SHADER_GEOMETRY) + NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream); + if (nir->info.stage == MESA_SHADER_COMPUTE) { if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS) { /* If we are shuffling local_invocation_id for quad derivatives, we