From f6b194b6483cdc4d0bba8652017e20cfba66ceff Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Sun, 25 Dec 2022 23:08:38 +0800 Subject: [PATCH] nir,ac/llvm,aco,radv,radeonsi: remove nir_export_vertex_amd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Timur Kristóf Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- .../compiler/aco_instruction_selection.cpp | 138 --------- src/amd/llvm/ac_nir_to_llvm.c | 3 - src/amd/llvm/ac_shader_abi.h | 2 - src/amd/vulkan/radv_nir_to_llvm.c | 227 -------------- src/compiler/nir/nir_intrinsics.py | 2 - .../drivers/radeonsi/si_shader_internal.h | 10 - src/gallium/drivers/radeonsi/si_shader_llvm.c | 30 -- .../drivers/radeonsi/si_shader_llvm_vs.c | 276 ------------------ 8 files changed, 688 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index cd464a831f2..46f97c9122a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5297,8 +5297,6 @@ load_input_from_temps(isel_context* ctx, nir_intrinsic_instr* instr, Temp dst) return true; } -static void export_vs_varying(isel_context* ctx, int slot, bool is_pos, int* next_pos); - void visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr) { @@ -8178,7 +8176,6 @@ emit_interp_center(isel_context* ctx, Temp dst, Temp bary, Temp pos1, Temp pos2) Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i); Temp lanecount_to_mask(isel_context* ctx, Temp count); void ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt); -static void create_vs_exports(isel_context* ctx); Temp get_interp_param(isel_context* ctx, nir_intrinsic_op intrin, @@ -9075,10 +9072,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src)); break; } - case nir_intrinsic_export_vertex_amd: { - create_vs_exports(ctx); - break; - } case nir_intrinsic_alloc_vertices_and_primitives_amd: { assert(ctx->stage.hw == HWStage::NGG); Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa); @@ -10825,137 +10818,6 @@ visit_cf_list(isel_context* ctx, struct exec_list* list) return false; } -static void -export_vs_varying(isel_context* ctx, int slot, bool is_pos, int* next_pos) -{ - assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG); - - const uint8_t *vs_output_param_offset = - ctx->program->info.outinfo.vs_output_param_offset; - - assert(vs_output_param_offset); - - int offset = vs_output_param_offset[slot]; - unsigned mask = ctx->outputs.mask[slot]; - if (!is_pos && !mask) - return; - if (!is_pos && offset == AC_EXP_PARAM_UNDEFINED) - return; - aco_ptr exp{ - create_instruction(aco_opcode::exp, Format::EXP, 4, 0)}; - exp->enabled_mask = mask; - for (unsigned i = 0; i < 4; ++i) { - if (mask & (1 << i)) - exp->operands[i] = Operand(ctx->outputs.temps[slot * 4u + i]); - else - exp->operands[i] = Operand(v1); - } - /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang. - * Setting valid_mask=1 prevents it and has no other effect. - */ - exp->valid_mask = ctx->options->gfx_level == GFX10 && is_pos && *next_pos == 0; - exp->done = false; - exp->compressed = false; - if (is_pos) - exp->dest = V_008DFC_SQ_EXP_POS + (*next_pos)++; - else - exp->dest = V_008DFC_SQ_EXP_PARAM + offset; - ctx->block->instructions.emplace_back(std::move(exp)); -} - -static void -export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos, - const aco_vp_output_info* outinfo) -{ - aco_ptr exp{ - create_instruction(aco_opcode::exp, Format::EXP, 4, 0)}; - exp->enabled_mask = 0; - for (unsigned i = 0; i < 4; ++i) - exp->operands[i] = Operand(v1); - if (ctx->outputs.mask[VARYING_SLOT_PSIZ]) { - exp->operands[0] = Operand(ctx->outputs.temps[VARYING_SLOT_PSIZ * 4u]); - exp->enabled_mask |= 0x1; - } - if (ctx->outputs.mask[VARYING_SLOT_LAYER] && !outinfo->writes_layer_per_primitive) { - exp->operands[2] = Operand(ctx->outputs.temps[VARYING_SLOT_LAYER * 4u]); - exp->enabled_mask |= 0x4; - } - if (ctx->outputs.mask[VARYING_SLOT_VIEWPORT] && !outinfo->writes_viewport_index_per_primitive) { - if (ctx->options->gfx_level < GFX9) { - exp->operands[3] = Operand(ctx->outputs.temps[VARYING_SLOT_VIEWPORT * 4u]); - exp->enabled_mask |= 0x8; - } else { - Builder bld(ctx->program, ctx->block); - - Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), - Operand(ctx->outputs.temps[VARYING_SLOT_VIEWPORT * 4u])); - if (exp->operands[2].isTemp()) - out = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand(out), exp->operands[2]); - - exp->operands[2] = Operand(out); - exp->enabled_mask |= 0x4; - } - } - if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) { - exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]); - exp->enabled_mask |= 0x2; - } - - exp->valid_mask = ctx->options->gfx_level == GFX10 && *next_pos == 0; - exp->done = false; - exp->compressed = false; - exp->dest = V_008DFC_SQ_EXP_POS + (*next_pos)++; - ctx->block->instructions.emplace_back(std::move(exp)); -} - -static void -create_vs_exports(isel_context* ctx) -{ - assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG); - const aco_vp_output_info* outinfo = &ctx->program->info.outinfo; - - assert(outinfo); - ctx->block->kind |= block_kind_export_end; - - /* Hardware requires position data to always be exported, even if the - * application did not write gl_Position. - */ - ctx->outputs.mask[VARYING_SLOT_POS] = 0xf; - - /* the order these position exports are created is important */ - int next_pos = 0; - export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos); - - if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index || - outinfo->writes_primitive_shading_rate) { - export_vs_psiz_layer_viewport_vrs(ctx, &next_pos, outinfo); - } - if (ctx->num_clip_distances + ctx->num_cull_distances > 0) - export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos); - if (ctx->num_clip_distances + ctx->num_cull_distances > 4) - export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos); - - if (ctx->program->gfx_level >= GFX11) - return; - - if (ctx->export_clip_dists) { - if (ctx->num_clip_distances + ctx->num_cull_distances > 0) - export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, false, &next_pos); - if (ctx->num_clip_distances + ctx->num_cull_distances > 4) - export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, false, &next_pos); - } - - for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) { - if (i < VARYING_SLOT_VAR0 && i != VARYING_SLOT_LAYER && i != VARYING_SLOT_PRIMITIVE_ID && - i != VARYING_SLOT_VIEWPORT) - continue; - if (ctx->shader && ctx->shader->info.per_primitive_outputs & BITFIELD64_BIT(i)) - continue; - - export_vs_varying(ctx, i, false, NULL); - } -} - static bool export_fs_mrt_z(isel_context* ctx) { diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index e03bbb6905c..8fc7600b352 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4166,9 +4166,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins result = ac_build_intrinsic(&ctx->ac, name, return_type, args, 5, 0); break; } - case nir_intrinsic_export_vertex_amd: - ctx->abi->export_vertex(ctx->abi); - break; case nir_intrinsic_elect: result = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, visit_first_invocation(ctx), ac_get_thread_id(&ctx->ac), ""); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 5fd5f9ec666..4943346a0a0 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -60,8 +60,6 @@ struct ac_shader_abi { /* Varying -> attribute number mapping. Also NIR-only */ unsigned fs_input_attr_indices[MAX_VARYING]; - void (*export_vertex)(struct ac_shader_abi *abi); - void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream); void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream, diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index bb709d58b9d..7e9729a417a 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -71,13 +71,6 @@ struct radv_shader_context { uint64_t output_mask; }; -struct radv_shader_output_values { - LLVMValueRef values[4]; - unsigned slot_name; - unsigned slot_index; - unsigned usage_mask; -}; - static inline struct radv_shader_context * radv_shader_context_from_abi(struct ac_shader_abi *abi) { @@ -667,16 +660,6 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values, args->out[i] = ac_to_float(&ctx->ac, args->out[i]); } -static void -radv_export_param(struct radv_shader_context *ctx, unsigned index, LLVMValueRef *values, - unsigned enabled_channels) -{ - struct ac_export_args args; - - si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, 0, &args); - ac_build_export(&ctx->ac, &args); -} - static LLVMValueRef radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) { @@ -686,211 +669,6 @@ radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) return LLVMBuildLoad2(ctx->ac.builder, type, output, ""); } -static void -radv_build_param_exports(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs, - unsigned noutput, const struct radv_vs_output_info *outinfo, - bool export_clip_dists) -{ - for (unsigned i = 0; i < noutput; i++) { - unsigned slot_name = outputs[i].slot_name; - unsigned usage_mask = outputs[i].usage_mask; - - if (slot_name != VARYING_SLOT_LAYER && slot_name != VARYING_SLOT_PRIMITIVE_ID && - slot_name != VARYING_SLOT_VIEWPORT && slot_name != VARYING_SLOT_CLIP_DIST0 && - slot_name != VARYING_SLOT_CLIP_DIST1 && slot_name < VARYING_SLOT_VAR0) - continue; - - if ((slot_name == VARYING_SLOT_CLIP_DIST0 || slot_name == VARYING_SLOT_CLIP_DIST1) && - !export_clip_dists) - continue; - - radv_export_param(ctx, outinfo->vs_output_param_offset[slot_name], outputs[i].values, - usage_mask); - } -} - -/* Generate export instructions for hardware VS shader stage or NGG GS stage - * (position and parameter data only). - */ -static void -radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs, - unsigned noutput, const struct radv_vs_output_info *outinfo, - bool export_clip_dists) -{ - LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_value = NULL; - LLVMValueRef primitive_shading_rate = NULL; - struct ac_export_args pos_args[4] = {0}; - unsigned pos_idx, index; - int i; - - /* Build position exports */ - for (i = 0; i < noutput; i++) { - switch (outputs[i].slot_name) { - case VARYING_SLOT_POS: - si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, 0, &pos_args[0]); - break; - case VARYING_SLOT_PSIZ: - psize_value = outputs[i].values[0]; - break; - case VARYING_SLOT_LAYER: - layer_value = outputs[i].values[0]; - break; - case VARYING_SLOT_VIEWPORT: - viewport_value = outputs[i].values[0]; - break; - case VARYING_SLOT_PRIMITIVE_SHADING_RATE: - primitive_shading_rate = outputs[i].values[0]; - break; - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - index = 2 + outputs[i].slot_index; - si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index, 0, - &pos_args[index]); - break; - default: - break; - } - } - - /* We need to add the position output manually if it's missing. */ - if (!pos_args[0].out[0]) { - pos_args[0].enabled_channels = 0xf; /* writemask */ - pos_args[0].valid_mask = 0; /* EXEC mask */ - pos_args[0].done = 0; /* last export? */ - pos_args[0].target = V_008DFC_SQ_EXP_POS; - pos_args[0].compr = 0; /* COMPR flag */ - pos_args[0].out[0] = ctx->ac.f32_0; /* X */ - pos_args[0].out[1] = ctx->ac.f32_0; /* Y */ - pos_args[0].out[2] = ctx->ac.f32_0; /* Z */ - pos_args[0].out[3] = ctx->ac.f32_1; /* W */ - } - - /* Add clip distance outputs manually if they're missing. */ - uint8_t clip_cull_mask = outinfo->clip_dist_mask | outinfo->cull_dist_mask; - for (i = 2; i < 4; i++) { - uint8_t mask = 0xf << (i * 4 - 8); - if ((clip_cull_mask & mask) && !pos_args[i].out[0]) { - pos_args[i].enabled_channels = 0x0; - pos_args[i].valid_mask = 0; - pos_args[i].done = 0; - pos_args[i].target = V_008DFC_SQ_EXP_POS + i; - pos_args[i].compr = 0; - pos_args[i].out[0] = ctx->ac.f32_0; - pos_args[i].out[1] = ctx->ac.f32_0; - pos_args[i].out[2] = ctx->ac.f32_0; - pos_args[i].out[3] = ctx->ac.f32_0; - } - } - - if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer || - outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) { - pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) | - (outinfo->writes_primitive_shading_rate == true ? 2 : 0) | - (outinfo->writes_layer == true ? 4 : 0)); - pos_args[1].valid_mask = 0; - pos_args[1].done = 0; - pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; - pos_args[1].compr = 0; - pos_args[1].out[0] = ctx->ac.f32_0; /* X */ - pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ - pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ - pos_args[1].out[3] = ctx->ac.f32_0; /* W */ - - if (outinfo->writes_pointsize == true) - pos_args[1].out[0] = psize_value; - if (outinfo->writes_layer == true) - pos_args[1].out[2] = layer_value; - if (outinfo->writes_viewport_index == true) { - if (ctx->options->gfx_level >= GFX9) { - /* GFX9 has the layer in out.z[10:0] and the viewport - * index in out.z[19:16]. - */ - LLVMValueRef v = viewport_value; - v = ac_to_integer(&ctx->ac, v); - v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, false), ""); - v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); - - pos_args[1].out[2] = ac_to_float(&ctx->ac, v); - pos_args[1].enabled_channels |= 1 << 2; - } else { - pos_args[1].out[3] = viewport_value; - pos_args[1].enabled_channels |= 1 << 3; - } - } - - if (outinfo->writes_primitive_shading_rate) { - pos_args[1].out[1] = primitive_shading_rate; - } - } - - /* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang. - * Setting valid_mask=1 prevents it and has no other effect. - */ - if (ctx->ac.gfx_level == GFX10) - pos_args[0].valid_mask = 1; - - pos_idx = 0; - for (i = 0; i < 4; i++) { - if (!pos_args[i].out[0]) - continue; - - /* Specify the target we are exporting */ - pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; - - if (pos_idx == outinfo->pos_exports) - /* Specify that this is the last export */ - pos_args[i].done = 1; - - ac_build_export(&ctx->ac, &pos_args[i]); - } - - if (ctx->options->gfx_level >= GFX11) - return; - - /* Build parameter exports */ - radv_build_param_exports(ctx, outputs, noutput, outinfo, export_clip_dists); -} - -static void -radv_llvm_visit_export_vertex(struct ac_shader_abi *abi) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - const struct radv_vs_output_info *outinfo = &ctx->shader_info->outinfo; - const bool export_clip_dists = outinfo->export_clip_dists; - struct radv_shader_output_values *outputs; - unsigned noutput = 0; - - /* Allocate a temporary array for the output values. */ - unsigned num_outputs = util_bitcount64(ctx->output_mask); - outputs = malloc(num_outputs * sizeof(outputs[0])); - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - if (!(ctx->output_mask & (1ull << i))) - continue; - - outputs[noutput].slot_name = i; - outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1; - - if (ctx->stage == MESA_SHADER_VERTEX) { - outputs[noutput].usage_mask = ctx->shader_info->vs.output_usage_mask[i]; - } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - outputs[noutput].usage_mask = ctx->shader_info->tes.output_usage_mask[i]; - } else if (ctx->stage == MESA_SHADER_GEOMETRY) { - outputs[noutput].usage_mask = ctx->shader_info->gs.output_usage_mask[i]; - } - - for (unsigned j = 0; j < 4; j++) { - outputs[noutput].values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); - } - - noutput++; - } - - radv_llvm_export_vs(ctx, outputs, noutput, outinfo, export_clip_dists); - - free(outputs); -} - static bool si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned target, unsigned index, struct ac_export_args *args) @@ -1245,8 +1023,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ac_nir_fixup_ls_hs_input_vgprs(&ctx); if (is_ngg) { - ctx.abi.export_vertex = radv_llvm_visit_export_vertex; - if (!info->is_ngg_passthrough) declare_esgs_ring(&ctx); @@ -1280,10 +1056,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && !ctx.shader_info->is_ngg) { ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter; ctx.abi.emit_primitive = visit_end_primitive; - } else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) { - ctx.abi.export_vertex = radv_llvm_visit_export_vertex; } else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) { - ctx.abi.export_vertex = radv_llvm_visit_export_vertex; ctx.abi.load_inputs = radv_load_vs_inputs; } diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index e5b989c289f..f5a923f198f 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1423,8 +1423,6 @@ intrinsic("load_cull_any_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_EL intrinsic("load_cull_small_prim_precision_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER]) # Initial edge flags in a Vertex Shader, packed into the format the HW needs for primitive export. intrinsic("load_initial_edgeflags_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[]) -# Exports the current invocation's vertex. This is a placeholder where all vertex attribute export instructions should be emitted. -intrinsic("export_vertex_amd", src_comp=[], indices=[]) # Allocates export space for vertices and primitives. src[] = {num_vertices, num_primitives}. intrinsic("alloc_vertices_and_primitives_amd", src_comp=[1, 1], indices=[]) # Overwrites VS input registers, for use with vertex compaction after culling. src = {vertex_id, instance_id}. diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index d5066476f07..77dca9e5a20 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -30,12 +30,6 @@ struct util_debug_callback; -struct si_shader_output_values { - LLVMValueRef values[4]; - ubyte vertex_streams; - ubyte semantic; -}; - struct si_shader_args { struct ac_shader_args ac; @@ -257,10 +251,6 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx); void si_llvm_init_ps_callbacks(struct si_shader_context *ctx); /* si_shader_llvm_vs.c */ -void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx, - struct ac_export_args clipdist[2], LLVMValueRef clipvertex[4]); -void si_llvm_build_vs_exports(struct si_shader_context *ctx, - struct si_shader_output_values *outputs, unsigned noutput); void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key); void si_llvm_init_vs_callbacks(struct si_shader_context *ctx); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index d809d216c91..3c199e58b7f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -774,35 +774,6 @@ static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, LLVMVal return index; } -static void si_llvm_export_vertex(struct ac_shader_abi *abi) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader_info *info = &ctx->shader->selector->info; - struct si_shader_output_values outputs[PIPE_MAX_SHADER_OUTPUTS]; - LLVMValueRef *addrs = ctx->abi.outputs; - - unsigned num_outputs = info->num_outputs; - /* if needed, nir lower will append primitive id export at last */ - if (ctx->shader->key.ge.mono.u.vs_export_prim_id) - num_outputs++; - - for (unsigned i = 0; i < num_outputs; i++) { - if (i < info->num_outputs) { - outputs[i].semantic = info->output_semantic[i]; - outputs[i].vertex_streams = info->output_streams[i]; - } else { - outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID; - outputs[i].vertex_streams = 0; - } - - for (unsigned j = 0; j < 4; j++) - outputs[i].values[j] = - LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], ""); - } - - si_llvm_build_vs_exports(ctx, outputs, num_outputs); -} - bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader, struct nir_shader *nir, bool free_nir) { @@ -819,7 +790,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad ctx->num_images = info->base.num_images; ctx->abi.intrinsic_load = si_llvm_load_intrinsic; - ctx->abi.export_vertex = si_llvm_export_vertex; ctx->abi.load_sampler_desc = si_llvm_load_sampler_desc; si_llvm_create_main_func(ctx); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 5d04369a2e2..75e6551529e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -330,282 +330,6 @@ static LLVMValueRef si_load_vs_input(struct ac_shader_abi *abi, unsigned driver_ return ac_build_varying_gather_values(&ctx->ac, values, num_components, component); } -void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx, - struct ac_export_args clipdist[2], LLVMValueRef clipvertex[4]) -{ - unsigned reg_index; - unsigned chan; - unsigned const_chan; - LLVMValueRef base_elt; - LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0); - LLVMValueRef const_resource = ac_build_load_to_sgpr( - &ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), constbuf_index); - unsigned clipdist_mask = ctx->shader->selector->info.clipdist_mask & - ~ctx->shader->key.ge.opt.kill_clip_distances; - - for (reg_index = 0; reg_index < 2; reg_index++) { - struct ac_export_args *args = &clipdist[reg_index]; - - if (!(clipdist_mask & BITFIELD_RANGE(reg_index * 4, 4))) - continue; - - args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMGetUndef(ctx->ac.f32); - - /* Compute dot products of position and user clip plane vectors */ - for (chan = 0; chan < 4; chan++) { - if (!(clipdist_mask & BITFIELD_BIT(reg_index * 4 + chan))) - continue; - - for (const_chan = 0; const_chan < 4; const_chan++) { - LLVMValueRef addr = - LLVMConstInt(ctx->ac.i32, ((reg_index * 4 + chan) * 4 + const_chan) * 4, 0); - base_elt = si_buffer_load_const(ctx, const_resource, addr); - args->out[chan] = - ac_build_fmad(&ctx->ac, base_elt, clipvertex[const_chan], - const_chan == 0 ? ctx->ac.f32_0 : args->out[chan]); - } - } - - args->enabled_channels = 0xf; - args->valid_mask = 0; - args->done = 0; - args->target = V_008DFC_SQ_EXP_POS + 2 + reg_index; - args->compr = 0; - } -} - -/* Initialize arguments for the shader export intrinsic */ -static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, const LLVMValueRef *values, - unsigned target, struct ac_export_args *args) -{ - args->enabled_channels = 0xf; /* writemask - default is 0xf */ - args->valid_mask = 0; /* Specify whether the EXEC mask represents the valid mask */ - args->done = 0; /* Specify whether this is the last export */ - args->target = target; /* Specify the target we are exporting */ - args->compr = false; - - memcpy(&args->out[0], values, sizeof(values[0]) * 4); -} - -/** - * Generate export instructions for hardware VS shader stage or NGG GS stage - * (position and parameter data only). - */ -void si_llvm_build_vs_exports(struct si_shader_context *ctx, - struct si_shader_output_values *outputs, unsigned noutput) -{ - struct si_shader *shader = ctx->shader; - struct ac_export_args pos_args[4] = {}; - LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, - viewport_index_value = NULL; - unsigned pos_idx, index; - unsigned clipdist_mask = (shader->selector->info.clipdist_mask & - ~shader->key.ge.opt.kill_clip_distances) | - shader->selector->info.culldist_mask; - int i; - - /* Build position exports. */ - for (i = 0; i < noutput; i++) { - switch (outputs[i].semantic) { - case VARYING_SLOT_POS: - si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS, &pos_args[0]); - break; - case VARYING_SLOT_PSIZ: - psize_value = outputs[i].values[0]; - break; - case VARYING_SLOT_LAYER: - layer_value = outputs[i].values[0]; - break; - case VARYING_SLOT_VIEWPORT: - viewport_index_value = outputs[i].values[0]; - break; - case VARYING_SLOT_EDGE: - edgeflag_value = outputs[i].values[0]; - break; - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - index = outputs[i].semantic - VARYING_SLOT_CLIP_DIST0; - if (clipdist_mask & BITFIELD_RANGE(index * 4, 4)) { - si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + 2 + index, - &pos_args[2 + index]); - } - break; - case VARYING_SLOT_CLIP_VERTEX: - si_llvm_clipvertex_to_clipdist(ctx, pos_args + 2, outputs[i].values); - break; - } - } - - /* We need to add the position output manually if it's missing. */ - if (!pos_args[0].out[0]) { - pos_args[0].enabled_channels = 0xf; /* writemask */ - pos_args[0].valid_mask = 0; /* EXEC mask */ - pos_args[0].done = 0; /* last export? */ - pos_args[0].target = V_008DFC_SQ_EXP_POS; - pos_args[0].compr = 0; /* COMPR flag */ - pos_args[0].out[0] = ctx->ac.f32_0; /* X */ - pos_args[0].out[1] = ctx->ac.f32_0; /* Y */ - pos_args[0].out[2] = ctx->ac.f32_0; /* Z */ - pos_args[0].out[3] = ctx->ac.f32_1; /* W */ - } - - bool writes_psize = shader->selector->info.writes_psize && !shader->key.ge.opt.kill_pointsize; - bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.ge.as_ngg; - bool writes_vrs = ctx->screen->options.vrs2x2; - - /* Write the misc vector (point size, edgeflag, layer, viewport). */ - if (writes_psize || pos_writes_edgeflag || writes_vrs || - shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) { - pos_args[1].enabled_channels = writes_psize | - ((pos_writes_edgeflag | writes_vrs) << 1) | - (shader->selector->info.writes_layer << 2); - - pos_args[1].valid_mask = 0; /* EXEC mask */ - pos_args[1].done = 0; /* last export? */ - pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; - pos_args[1].compr = 0; /* COMPR flag */ - pos_args[1].out[0] = ctx->ac.f32_0; /* X */ - pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ - pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ - pos_args[1].out[3] = ctx->ac.f32_0; /* W */ - - if (writes_psize) - pos_args[1].out[0] = psize_value; - - if (pos_writes_edgeflag) { - /* The output is a float, but the hw expects an integer - * with the first bit containing the edge flag. */ - edgeflag_value = LLVMBuildFPToUI(ctx->ac.builder, edgeflag_value, ctx->ac.i32, ""); - edgeflag_value = ac_build_umin(&ctx->ac, edgeflag_value, ctx->ac.i32_1); - - /* The LLVM intrinsic expects a float. */ - pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value); - } - - if (writes_vrs) { - LLVMValueRef rates; - if (ctx->screen->info.gfx_level >= GFX11) { - /* Bits [2:5] = VRS rate - * - * The range is [0, 15]. - * - * If the hw doesn't support VRS 4x4, it will silently use 2x2 instead. - */ - rates = LLVMConstInt(ctx->ac.i32, (V_0283D0_VRS_SHADING_RATE_4X4 << 2), 0); - } else { - /* Bits [2:3] = VRS rate X - * Bits [4:5] = VRS rate Y - * - * The range is [-2, 1]. Values: - * 1: 2x coarser shading rate in that direction. - * 0: normal shading rate - * -1: 2x finer shading rate (sample shading, not directional) - * -2: 4x finer shading rate (sample shading, not directional) - * - * Sample shading can't go above 8 samples, so both numbers can't be -2 - * at the same time. - */ - rates = LLVMConstInt(ctx->ac.i32, (1 << 2) | (1 << 4), 0); - } - - /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */ - rates = LLVMBuildSelect(ctx->ac.builder, - LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE, - pos_args[0].out[3], ctx->ac.f32_1, ""), - rates, ctx->ac.i32_0, ""); - - LLVMValueRef v = ac_to_integer(&ctx->ac, pos_args[1].out[1]); - v = LLVMBuildOr(ctx->ac.builder, v, rates, ""); - pos_args[1].out[1] = ac_to_float(&ctx->ac, v); - } - - if (ctx->screen->info.gfx_level >= GFX9) { - /* GFX9 has the layer in out.z[10:0] and the viewport - * index in out.z[19:16]. - */ - if (shader->selector->info.writes_layer) - pos_args[1].out[2] = layer_value; - - if (shader->selector->info.writes_viewport_index) { - LLVMValueRef v = viewport_index_value; - - v = ac_to_integer(&ctx->ac, v); - v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, 0), ""); - v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); - pos_args[1].out[2] = ac_to_float(&ctx->ac, v); - pos_args[1].enabled_channels |= 1 << 2; - } - } else { - if (shader->selector->info.writes_layer) - pos_args[1].out[2] = layer_value; - - if (shader->selector->info.writes_viewport_index) { - pos_args[1].out[3] = viewport_index_value; - pos_args[1].enabled_channels |= 1 << 3; - } - } - } - - /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang. - * Setting valid_mask=1 prevents it and has no other effect. - */ - if (ctx->screen->info.gfx_level == GFX10) - pos_args[0].valid_mask = 1; - - pos_idx = 0; - for (i = 0; i < 4; i++) { - if (!pos_args[i].out[0]) - continue; - - /* Specify the target we are exporting */ - pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; - - if (pos_idx == shader->info.nr_pos_exports) { - /* Specify that this is the last export */ - pos_args[i].done = 1; - - /* If a shader has no param exports, rasterization can start before - * the shader finishes and thus memory stores might not finish before - * the pixel shader starts. - * - * VLOAD is for atomics with return. - */ - if (ctx->screen->info.gfx_level >= GFX10 && - !shader->info.nr_param_exports && - shader->selector->info.base.writes_memory) - ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE); - } - - ac_build_export(&ctx->ac, &pos_args[i]); - } - - if (!shader->info.nr_param_exports || - /* GFX11 param export is handled in nir */ - ctx->screen->info.gfx_level >= GFX11) - return; - - /* Build parameter exports. Use 2 loops to export params in ascending order. - * 32 is the maximum number of parameter exports. - */ - struct ac_export_args param_exports[32] = {}; - uint64_t vs_output_param_mask = shader->info.vs_output_param_mask; - - while (vs_output_param_mask) { - unsigned i = u_bit_scan64(&vs_output_param_mask); - unsigned offset = shader->info.vs_output_param_offset[outputs[i].semantic]; - - assert(offset <= AC_EXP_PARAM_OFFSET_31); - assert(!param_exports[offset].enabled_channels); - - si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_PARAM + offset, - ¶m_exports[offset]); - } - - /* Export attributes using parameter exports. */ - for (unsigned i = 0; i < shader->info.nr_param_exports; i++) - ac_build_export(&ctx->ac, ¶m_exports[i]); -} - /** * Build the vertex shader prolog function. *