diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 9a238674dd1..ea4065de6b9 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -101,8 +101,8 @@ aco_ptr create_s_mov(Definition dst, Operand src); enum sendmsg { sendmsg_none = 0, - _sendmsg_gs = 2, /* gfx6 to gfx10.3 */ - _sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */ + sendmsg_gs = 2, /* gfx6 to gfx10.3 */ + sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */ sendmsg_hs_tessfactor = 2, /* gfx11+ */ sendmsg_dealloc_vgprs = 3, /* gfx11+ */ sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */ @@ -127,20 +127,6 @@ enum sendmsg_rtn { sendmsg_rtn_mask = 0xff, }; -inline sendmsg -sendmsg_gs(bool cut, bool emit, unsigned stream) -{ - assert(stream < 4); - return (sendmsg)((unsigned)_sendmsg_gs | (cut << 4) | (emit << 5) | (stream << 8)); -} - -inline sendmsg -sendmsg_gs_done(bool cut, bool emit, unsigned stream) -{ - assert(stream < 4); - return (sendmsg)((unsigned)_sendmsg_gs_done | (cut << 4) | (emit << 5) | (stream << 8)); -} - enum bperm_swiz { bperm_b1_sign = 8, bperm_b3_sign = 9, diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index db33b83e4ea..2c9b19e2f98 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8998,20 +8998,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) break; } - case nir_intrinsic_emit_vertex_with_counter: { - assert(ctx->stage.hw == HWStage::GS); - unsigned stream = nir_intrinsic_stream_id(instr); - bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream)); - break; - } - case nir_intrinsic_end_primitive_with_counter: { - if (ctx->stage.hw != HWStage::NGG) { - unsigned stream = nir_intrinsic_stream_id(instr); - bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, - sendmsg_gs(true, false, stream)); - } - break; - } case nir_intrinsic_sendmsg_amd: { unsigned imm = nir_intrinsic_base(instr); Temp m0_content = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); @@ -9035,24 +9021,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src)); break; } - case nir_intrinsic_alloc_vertices_and_primitives_amd: { - assert(ctx->stage.hw == HWStage::NGG); - Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa); - Temp num_primitives = get_ssa_temp(ctx, instr->src[1].ssa); - - /* Put the number of vertices and primitives into m0 for the GS_ALLOC_REQ */ - Temp tmp = - bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), - num_primitives, Operand::c32(12u)); - tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc), - tmp, num_vertices); - - /* Request the SPI to allocate space for the primitives and vertices - * that will be exported by the threadgroup. - */ - bld.sopp(aco_opcode::s_sendmsg, bld.m0(tmp), -1, sendmsg_gs_alloc_req); - break; - } case nir_intrinsic_gds_atomic_add_amd: { Temp store_val = get_ssa_temp(ctx, instr->src[0].ssa); Temp gds_addr = get_ssa_temp(ctx, instr->src[1].ssa); @@ -11410,14 +11378,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const bld.barrier(aco_opcode::p_barrier, memory_sync_info(storage_shared, semantic_acqrel, scope), scope); } - - if (ctx.stage == vertex_geometry_gs || ctx.stage == tess_eval_geometry_gs) { - ctx.gs_wave_id = bld.pseudo(aco_opcode::p_extract, bld.def(s1, m0), bld.def(s1, scc), - get_arg(&ctx, args->merged_wave_info), Operand::c32(2u), - Operand::c32(8u), Operand::zero()); - } - } else if (ctx.stage == geometry_gs) - ctx.gs_wave_id = get_arg(&ctx, args->gs_wave_id); + } visit_cf_list(&ctx, &func->body); diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index 65a32a9b561..771c608d9ec 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -91,9 +91,6 @@ struct isel_context { Temp arg_temps[AC_MAX_ARGS]; - /* GS inputs */ - Temp gs_wave_id; - /* VS output information */ bool export_clip_dists; unsigned num_clip_distances; diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index d5e24b18bfa..c069baa368c 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -334,18 +334,18 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins } case aco_opcode::s_sendmsg: { unsigned id = imm & sendmsg_id_mask; - static_assert(_sendmsg_gs == sendmsg_hs_tessfactor); - static_assert(_sendmsg_gs_done == sendmsg_dealloc_vgprs); + static_assert(sendmsg_gs == sendmsg_hs_tessfactor); + static_assert(sendmsg_gs_done == sendmsg_dealloc_vgprs); switch (id) { case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break; - case _sendmsg_gs: + case sendmsg_gs: if (gfx_level >= GFX11) fprintf(output, " sendmsg(hs_tessfactor)"); else fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "", imm & 0x20 ? ", emit" : "", imm >> 8); break; - case _sendmsg_gs_done: + case sendmsg_gs_done: if (gfx_level >= GFX11) fprintf(output, " sendmsg(dealloc_vgprs)"); else diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 69225ee2c6c..3531c0562da 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -424,7 +424,7 @@ bool is_done_sendmsg(amd_gfx_level gfx_level, const Instruction* instr) { if (gfx_level <= GFX10_3 && instr->opcode == aco_opcode::s_sendmsg) - return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done; + return (instr->sopp().imm & sendmsg_id_mask) == sendmsg_gs_done; return false; } diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index ce6a97e831d..55a178ea23c 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3832,16 +3832,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins nir_intrinsic_io_semantics(instr).high_16bits); break; } - case nir_intrinsic_emit_vertex_with_counter: { - unsigned stream = nir_intrinsic_stream_id(instr); - LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); - ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs); - break; - } - case nir_intrinsic_end_primitive: - case nir_intrinsic_end_primitive_with_counter: - ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); - break; case nir_intrinsic_sendmsg_amd: { unsigned imm = nir_intrinsic_base(instr); LLVMValueRef m0_content = get_src(ctx, instr->src[0]); @@ -4122,22 +4112,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_load_workgroup_num_input_primitives_amd: result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 22, 9); break; - case nir_intrinsic_alloc_vertices_and_primitives_amd: { - /* The caller should only call this conditionally for wave 0. - * - * Send GS Alloc Req message from the first wave of the group to SPI. - * Message payload is: - * - bits 0..10: vertices in group - * - bits 12..22: primitives in group - */ - LLVMValueRef vtx_cnt = get_src(ctx, instr->src[0]); - LLVMValueRef prim_cnt = get_src(ctx, instr->src[1]); - LLVMValueRef msg = LLVMBuildShl(ctx->ac.builder, prim_cnt, - LLVMConstInt(ctx->ac.i32, 12, false), ""); - msg = LLVMBuildOr(ctx->ac.builder, msg, vtx_cnt, ""); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, msg); - break; - } case nir_intrinsic_overwrite_vs_arguments_amd: ctx->abi->vertex_id_replaced = get_src(ctx, instr->src[0]); ctx->abi->instance_id_replaced = get_src(ctx, instr->src[1]); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index ec37a008c52..a17ad6cfe55 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -59,11 +59,6 @@ struct ac_shader_abi { /* Varying -> attribute number mapping. Also NIR-only */ unsigned fs_input_attr_indices[MAX_VARYING]; - void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream); - - void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream, - LLVMValueRef vertexidx, LLVMValueRef *addrs); - LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type, LLVMValueRef vertex_index, LLVMValueRef param_index, unsigned driver_location, unsigned component, diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index ffb41412bfd..81b19319e71 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -168,23 +168,6 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has } } -static void -visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx, - LLVMValueRef *addrs) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), - ctx->gs_wave_id); -} - -static void -visit_end_primitive(struct ac_shader_abi *abi, unsigned stream) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), - ctx->gs_wave_id); -} - static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero) { @@ -438,11 +421,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.shader = shaders[shader_idx]; ctx.output_mask = 0; - if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && !ctx.shader_info->is_ngg) { - ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter; - ctx.abi.emit_primitive = visit_end_primitive; - } - if (shader_idx && !(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg)) { /* Execute a barrier before the second shader in * a merged shader. diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index f7dc238587d..57645cb9840 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1478,8 +1478,6 @@ intrinsic("load_cull_any_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_EL intrinsic("load_cull_small_prim_precision_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER]) # Initial edge flags in a Vertex Shader, packed into the format the HW needs for primitive export. intrinsic("load_initial_edgeflags_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[]) -# Allocates export space for vertices and primitives. src[] = {num_vertices, num_primitives}. -intrinsic("alloc_vertices_and_primitives_amd", src_comp=[1, 1], indices=[]) # Corresponds to s_sendmsg in the GCN/RDNA ISA, src[] = { m0_content }, BASE = imm intrinsic("sendmsg_amd", src_comp=[1], indices=[BASE]) # Overwrites VS input registers, for use with vertex compaction after culling. src = {vertex_id, instance_id}. diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index f57cab7a00d..94ab629e287 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -218,7 +218,6 @@ void si_llvm_es_build_end(struct si_shader_context *ctx); void si_preload_esgs_ring(struct si_shader_context *ctx); void si_preload_gs_rings(struct si_shader_context *ctx); void si_llvm_gs_build_end(struct si_shader_context *ctx); -void si_llvm_init_gs_callbacks(struct si_shader_context *ctx); /* si_shader_llvm_tess.c */ LLVMValueRef si_get_rel_patch_id(struct si_shader_context *ctx); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 9625aea0249..e3ee71d8d2f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -808,8 +808,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade break; case MESA_SHADER_GEOMETRY: - si_llvm_init_gs_callbacks(ctx); - if (ctx->shader->key.ge.as_ngg) { LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader)); ctx->gs_ngg_scratch = (struct ac_llvm_pointer) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 1b94a3e0472..58aaf944526 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -88,45 +88,12 @@ void si_llvm_es_build_end(struct si_shader_context *ctx) si_set_es_return_value_for_gs(ctx); } -static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx) -{ - if (ctx->screen->info.gfx_level >= GFX9) - return si_unpack_param(ctx, ctx->args->ac.merged_wave_info, 16, 8); - else - return ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id); -} - void si_llvm_gs_build_end(struct si_shader_context *ctx) { if (ctx->screen->info.gfx_level >= GFX9) ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); } -/* Emit one vertex from the geometry shader */ -static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, - LLVMValueRef vertexidx, LLVMValueRef *addrs) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - assert(!ctx->shader->key.ge.as_ngg); - - /* Signal vertex emission */ - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), - si_get_gs_wave_id(ctx)); -} - -/* Cut one primitive from the geometry shader */ -static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - assert(!ctx->shader->key.ge.as_ngg); - - /* Signal primitive cut */ - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), - si_get_gs_wave_id(ctx)); -} - void si_preload_esgs_ring(struct si_shader_context *ctx) { LLVMBuilderRef builder = ctx->ac.builder; @@ -247,9 +214,3 @@ void si_preload_gs_rings(struct si_shader_context *ctx) ctx->gsvs_ring[stream] = ring; } } - -void si_llvm_init_gs_callbacks(struct si_shader_context *ctx) -{ - ctx->abi.emit_vertex_with_counter = si_llvm_emit_vertex; - ctx->abi.emit_primitive = si_llvm_emit_primitive; -}