diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 3b638ceb5a7..379c88f2b2d 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3616,11 +3616,14 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins } case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_first_vertex: - result = ctx->abi->load_base_vertex(ctx->abi, - instr->intrinsic == nir_intrinsic_load_base_vertex); - break; case nir_intrinsic_load_workgroup_size: - result = ctx->abi->load_local_group_size(ctx->abi); + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_tess_level_inner: + case nir_intrinsic_load_tess_level_outer_default: + case nir_intrinsic_load_tess_level_inner_default: + case nir_intrinsic_load_patch_vertices_in: + case nir_intrinsic_load_sample_mask_in: + result = ctx->abi->intrinsic_load(ctx->abi, instr->intrinsic); break; case nir_intrinsic_load_vertex_id: result = LLVMBuildAdd(ctx->ac.builder, @@ -3687,9 +3690,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_load_sample_pos: result = load_sample_pos(ctx); break; - case nir_intrinsic_load_sample_mask_in: - result = ctx->abi->load_sample_mask_in(ctx->abi); - break; case nir_intrinsic_load_frag_coord: result = emit_load_frag_coord(ctx); break; @@ -4031,21 +4031,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins result = ac_build_gather_values(&ctx->ac, coord, 3); break; } - case nir_intrinsic_load_tess_level_outer: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false); - break; - case nir_intrinsic_load_tess_level_inner: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false); - break; - case nir_intrinsic_load_tess_level_outer_default: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true); - break; - case nir_intrinsic_load_tess_level_inner_default: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true); - break; - case nir_intrinsic_load_patch_vertices_in: - result = ctx->abi->load_patch_vertices_in(ctx->abi); - break; case nir_intrinsic_vote_all: { result = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0])); break; diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index b7406b92b70..0c370ffbc9b 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -27,6 +27,7 @@ #include "ac_shader_args.h" #include "ac_shader_util.h" #include "compiler/shader_enums.h" +#include "nir.h" #include #include @@ -74,10 +75,6 @@ struct ac_shader_abi { LLVMValueRef src, unsigned writemask, unsigned component, unsigned location, unsigned driver_location); - LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi); - - LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, unsigned varying_id, - bool load_default_state); LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index); @@ -109,14 +106,10 @@ struct ac_shader_abi { LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id); - LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi); - - LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi); - - LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi, bool non_indexed_is_zero); - LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi); + LLVMValueRef (*intrinsic_load)(struct ac_shader_abi *abi, nir_intrinsic_op op); + /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently * uses it due to promoting D16 to D32, but radv needs it off. */ bool clamp_shadow_reference; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index ecfd4f94f36..6069f573e40 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2012,6 +2012,17 @@ declare_esgs_ring(struct radv_shader_context *ctx) LLVMSetAlignment(ctx->esgs_ring, 64 * 1024); } +static LLVMValueRef radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic_op op) +{ + switch (op) { + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_first_vertex: + return radv_load_base_vertex(abi, op == nir_intrinsic_load_base_vertex); + default: + return NULL; + } +} + static LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options, @@ -2044,6 +2055,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2); + ctx.abi.intrinsic_load = radv_intrinsic_load; ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter; ctx.abi.load_ubo = radv_load_ubo; ctx.abi.load_ssbo = radv_load_ssbo; @@ -2113,7 +2125,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.emit_primitive = visit_end_primitive; } else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) { } else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) { - ctx.abi.load_base_vertex = radv_load_base_vertex; ctx.abi.load_inputs = radv_load_vs_inputs; } else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) { ctx.abi.load_sample_position = load_sample_position; diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 8de6c8caacd..b6eb7574ef3 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -686,6 +686,12 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, info->output_usagemask[info->num_outputs] = 0x1; } + if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + /* This is a hack to simplify loading tess levels in TES. */ + info->input[info->num_inputs].semantic = VARYING_SLOT_TESS_LEVEL_OUTER; + info->input[info->num_inputs + 1].semantic = VARYING_SLOT_TESS_LEVEL_INNER; + } + if (nir->info.stage == MESA_SHADER_FRAGMENT) { info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid || info->uses_persp_sample || info->uses_linear_center || diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 1e1f1b52f93..b52187fc491 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -244,6 +244,7 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx); void si_llvm_init_gs_callbacks(struct si_shader_context *ctx); /* si_shader_llvm_tess.c */ +LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx); void si_llvm_preload_tes_rings(struct si_shader_context *ctx); void si_llvm_ls_build_end(struct si_shader_context *ctx); void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 64703706fd6..f9d6cb980a3 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -396,21 +396,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle } } -static LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - assert(ctx->shader->selector->info.base.workgroup_size_variable && - ctx->shader->selector->info.uses_variable_block_size); - - LLVMValueRef chan[3] = { - si_unpack_param(ctx, ctx->block_size, 0, 10), - si_unpack_param(ctx, ctx->block_size, 10, 10), - si_unpack_param(ctx, ctx->block_size, 20, 10), - }; - return ac_build_gather_values(&ctx->ac, chan, 3); -} - static void si_llvm_declare_compute_memory(struct si_shader_context *ctx) { struct si_shader_selector *sel = ctx->shader->selector; @@ -726,6 +711,72 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part LLVMBuildRet(builder, ret); } +static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrinsic_op op) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + const struct si_shader_info *info = &ctx->shader->selector->info; + + switch (op) { + case nir_intrinsic_load_first_vertex: + return ac_get_arg(&ctx->ac, ctx->args.base_vertex); + + case nir_intrinsic_load_base_vertex: { + /* For non-indexed draws, the base vertex set by the driver + * (for direct draws) or the CP (for indirect draws) is the + * first vertex ID, but GLSL expects 0 to be returned. + */ + LLVMValueRef indexed = si_unpack_param(ctx, ctx->vs_state_bits, 1, 1); + indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, ""); + return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex), + ctx->ac.i32_0, ""); + } + + case nir_intrinsic_load_workgroup_size: { + assert(ctx->shader->selector->info.base.workgroup_size_variable && + ctx->shader->selector->info.uses_variable_block_size); + LLVMValueRef chan[3] = { + si_unpack_param(ctx, ctx->block_size, 0, 10), + si_unpack_param(ctx, ctx->block_size, 10, 10), + si_unpack_param(ctx, ctx->block_size, 20, 10), + }; + return ac_build_gather_values(&ctx->ac, chan, 3); + } + + case nir_intrinsic_load_tess_level_outer: + return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true, false); + + case nir_intrinsic_load_tess_level_inner: + return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true, false); + + case nir_intrinsic_load_tess_level_outer_default: + case nir_intrinsic_load_tess_level_inner_default: { + LLVMValueRef slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0); + LLVMValueRef buf = ac_get_arg(&ctx->ac, ctx->internal_bindings); + buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot); + int offset = op == nir_intrinsic_load_tess_level_inner_default ? 4 : 0; + LLVMValueRef val[4]; + + for (int i = 0; i < 4; i++) + val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0)); + return ac_build_gather_values(&ctx->ac, val, 4); + } + + case nir_intrinsic_load_patch_vertices_in: + if (ctx->stage == MESA_SHADER_TESS_CTRL) + return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6); + else if (ctx->stage == MESA_SHADER_TESS_EVAL) + return si_get_num_tcs_out_vertices(ctx); + else + return NULL; + + case nir_intrinsic_load_sample_mask_in: + return ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.sample_coverage)); + + default: + return NULL; + } +} + bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader, struct nir_shader *nir, bool free_nir, bool ngg_cull_shader) { @@ -741,6 +792,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad ctx->num_samplers = BITSET_LAST_BIT(info->base.textures_used); ctx->num_images = info->base.num_images; + ctx->abi.intrinsic_load = si_llvm_load_intrinsic; + si_llvm_init_resource_callbacks(ctx); si_llvm_create_main_func(ctx, ngg_cull_shader); @@ -839,8 +892,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad } case MESA_SHADER_COMPUTE: - ctx->abi.load_local_group_size = si_llvm_get_block_size; - if (nir->info.cs.user_data_components_amd) { ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data); ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 7ab160c3bc4..79a32a2774f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -31,12 +31,6 @@ LLVMValueRef si_get_sample_id(struct si_shader_context *ctx) return si_unpack_param(ctx, ctx->args.ancillary, 8, 4); } -static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - return ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.sample_coverage)); -} - static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); @@ -987,6 +981,5 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader void si_llvm_init_ps_callbacks(struct si_shader_context *ctx) { ctx->abi.load_sample_position = load_sample_position; - ctx->abi.load_sample_mask_in = load_sample_mask_in; ctx->abi.emit_fbfetch = si_nir_emit_fbfetch; } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 46e7ab62e42..925b9e156a0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -135,7 +135,7 @@ static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_conte return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset); } -static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx) +LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx) { unsigned tcs_out_vertices = ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out @@ -219,7 +219,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices; LLVMValueRef param_stride, constant16; - vertices_per_patch = get_num_tcs_out_vertices(ctx); + vertices_per_patch = si_get_num_tcs_out_vertices(ctx); num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6); num_patches = LLVMBuildAdd(ctx->ac.builder, num_patches, ctx->ac.i32_1, ""); total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, ""); @@ -563,79 +563,6 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, } } -static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic) -{ - LLVMValueRef base, addr; - - int param = si_shader_io_get_unique_index_patch(semantic); - - base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); - addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL, - LLVMConstInt(ctx->ac.i32, param, 0)); - - return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true); -} - -static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned sysval) -{ - LLVMValueRef buf, slot, val[4]; - int i, offset; - - slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0); - buf = ac_get_arg(&ctx->ac, ctx->internal_bindings); - buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot); - offset = sysval == SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT ? 4 : 0; - - for (i = 0; i < 4; i++) - val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0)); - return ac_build_gather_values(&ctx->ac, val, 4); -} - -static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id, - bool load_default_state) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - unsigned semantic; - - if (load_default_state) { - switch (varying_id) { - case VARYING_SLOT_TESS_LEVEL_INNER: - semantic = SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT; - break; - case VARYING_SLOT_TESS_LEVEL_OUTER: - semantic = SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT; - break; - default: - unreachable("unknown tess level"); - } - return load_tess_level_default(ctx, semantic); - } - - switch (varying_id) { - case VARYING_SLOT_TESS_LEVEL_INNER: - semantic = VARYING_SLOT_TESS_LEVEL_INNER; - break; - case VARYING_SLOT_TESS_LEVEL_OUTER: - semantic = VARYING_SLOT_TESS_LEVEL_OUTER; - break; - default: - unreachable("unknown tess level"); - } - - return load_tess_level(ctx, semantic); -} - -static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - if (ctx->stage == MESA_SHADER_TESS_CTRL) - return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6); - else if (ctx->stage == MESA_SHADER_TESS_EVAL) - return get_num_tcs_out_vertices(ctx); - else - unreachable("invalid shader stage for VERTICESIN"); -} - /** * Forward all outputs from the vertex shader to the TES. This is only used * for the fixed function TCS. @@ -1086,14 +1013,10 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx) { ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings; - ctx->abi.load_tess_level = si_load_tess_level; ctx->abi.store_tcs_outputs = si_nir_store_output_tcs; - ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in; } void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader) { ctx->abi.load_tess_varyings = si_nir_load_input_tes; - ctx->abi.load_tess_level = si_load_tess_level; - ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in; } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 1e5a10c20f8..103fb64356a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -986,27 +986,7 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part si_llvm_build_ret(ctx, ret); } -static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - /* This doesn't happen with GL: */ - if (!non_indexed_is_zero) - return ac_get_arg(&ctx->ac, ctx->args.base_vertex); - - /* For non-indexed draws, the base vertex set by the driver - * (for direct draws) or the CP (for indirect draws) is the - * first vertex ID, but GLSL expects 0 to be returned. - */ - LLVMValueRef indexed = si_unpack_param(ctx, ctx->vs_state_bits, 1, 1); - indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, ""); - - return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex), - ctx->ac.i32_0, ""); -} - void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader) { - ctx->abi.load_base_vertex = get_base_vertex; ctx->abi.load_inputs = si_load_vs_input; }