ac: replace 5 ac_shader_abi::load_* callbacks with 1 intrinsic_load callback

This merges them into si_llvm_load_intrinsic and reuses load_tess_varyings.

RADV only implemented 1 callback.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16467>
This commit is contained in:
Marek Olšák
2022-05-05 22:16:27 -04:00
parent 535d954914
commit 29965f356b
9 changed files with 99 additions and 156 deletions

View File

@@ -3616,11 +3616,14 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
}
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_first_vertex:
result = ctx->abi->load_base_vertex(ctx->abi,
instr->intrinsic == nir_intrinsic_load_base_vertex);
break;
case nir_intrinsic_load_workgroup_size:
result = ctx->abi->load_local_group_size(ctx->abi);
case nir_intrinsic_load_tess_level_outer:
case nir_intrinsic_load_tess_level_inner:
case nir_intrinsic_load_tess_level_outer_default:
case nir_intrinsic_load_tess_level_inner_default:
case nir_intrinsic_load_patch_vertices_in:
case nir_intrinsic_load_sample_mask_in:
result = ctx->abi->intrinsic_load(ctx->abi, instr->intrinsic);
break;
case nir_intrinsic_load_vertex_id:
result = LLVMBuildAdd(ctx->ac.builder,
@@ -3687,9 +3690,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
case nir_intrinsic_load_sample_pos:
result = load_sample_pos(ctx);
break;
case nir_intrinsic_load_sample_mask_in:
result = ctx->abi->load_sample_mask_in(ctx->abi);
break;
case nir_intrinsic_load_frag_coord:
result = emit_load_frag_coord(ctx);
break;
@@ -4031,21 +4031,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
result = ac_build_gather_values(&ctx->ac, coord, 3);
break;
}
case nir_intrinsic_load_tess_level_outer:
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
break;
case nir_intrinsic_load_tess_level_inner:
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
break;
case nir_intrinsic_load_tess_level_outer_default:
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
break;
case nir_intrinsic_load_tess_level_inner_default:
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
break;
case nir_intrinsic_load_patch_vertices_in:
result = ctx->abi->load_patch_vertices_in(ctx->abi);
break;
case nir_intrinsic_vote_all: {
result = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
break;

View File

@@ -27,6 +27,7 @@
#include "ac_shader_args.h"
#include "ac_shader_util.h"
#include "compiler/shader_enums.h"
#include "nir.h"
#include <llvm-c/Core.h>
#include <assert.h>
@@ -74,10 +75,6 @@ struct ac_shader_abi {
LLVMValueRef src, unsigned writemask,
unsigned component, unsigned location, unsigned driver_location);
LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, unsigned varying_id,
bool load_default_state);
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
@@ -109,14 +106,10 @@ struct ac_shader_abi {
LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id);
LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi, bool non_indexed_is_zero);
LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
LLVMValueRef (*intrinsic_load)(struct ac_shader_abi *abi, nir_intrinsic_op op);
/* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
* uses it due to promoting D16 to D32, but radv needs it off. */
bool clamp_shadow_reference;

View File

@@ -2012,6 +2012,17 @@ declare_esgs_ring(struct radv_shader_context *ctx)
LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
}
static LLVMValueRef radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic_op op)
{
switch (op) {
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_first_vertex:
return radv_load_base_vertex(abi, op == nir_intrinsic_load_base_vertex);
default:
return NULL;
}
}
static LLVMModuleRef
ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
const struct radv_nir_compiler_options *options,
@@ -2044,6 +2055,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2);
ctx.abi.intrinsic_load = radv_intrinsic_load;
ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
ctx.abi.load_ubo = radv_load_ubo;
ctx.abi.load_ssbo = radv_load_ssbo;
@@ -2113,7 +2125,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.abi.emit_primitive = visit_end_primitive;
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
ctx.abi.load_base_vertex = radv_load_base_vertex;
ctx.abi.load_inputs = radv_load_vs_inputs;
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
ctx.abi.load_sample_position = load_sample_position;

View File

@@ -686,6 +686,12 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
info->output_usagemask[info->num_outputs] = 0x1;
}
if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
/* This is a hack to simplify loading tess levels in TES. */
info->input[info->num_inputs].semantic = VARYING_SLOT_TESS_LEVEL_OUTER;
info->input[info->num_inputs + 1].semantic = VARYING_SLOT_TESS_LEVEL_INNER;
}
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid ||
info->uses_persp_sample || info->uses_linear_center ||

View File

@@ -244,6 +244,7 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx);
void si_llvm_init_gs_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_tess.c */
LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx);
void si_llvm_preload_tes_rings(struct si_shader_context *ctx);
void si_llvm_ls_build_end(struct si_shader_context *ctx);
void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key);

View File

@@ -396,21 +396,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle
}
}
static LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
assert(ctx->shader->selector->info.base.workgroup_size_variable &&
ctx->shader->selector->info.uses_variable_block_size);
LLVMValueRef chan[3] = {
si_unpack_param(ctx, ctx->block_size, 0, 10),
si_unpack_param(ctx, ctx->block_size, 10, 10),
si_unpack_param(ctx, ctx->block_size, 20, 10),
};
return ac_build_gather_values(&ctx->ac, chan, 3);
}
static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
{
struct si_shader_selector *sel = ctx->shader->selector;
@@ -726,6 +711,72 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
LLVMBuildRet(builder, ret);
}
static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrinsic_op op)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
const struct si_shader_info *info = &ctx->shader->selector->info;
switch (op) {
case nir_intrinsic_load_first_vertex:
return ac_get_arg(&ctx->ac, ctx->args.base_vertex);
case nir_intrinsic_load_base_vertex: {
/* For non-indexed draws, the base vertex set by the driver
* (for direct draws) or the CP (for indirect draws) is the
* first vertex ID, but GLSL expects 0 to be returned.
*/
LLVMValueRef indexed = si_unpack_param(ctx, ctx->vs_state_bits, 1, 1);
indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, "");
return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex),
ctx->ac.i32_0, "");
}
case nir_intrinsic_load_workgroup_size: {
assert(ctx->shader->selector->info.base.workgroup_size_variable &&
ctx->shader->selector->info.uses_variable_block_size);
LLVMValueRef chan[3] = {
si_unpack_param(ctx, ctx->block_size, 0, 10),
si_unpack_param(ctx, ctx->block_size, 10, 10),
si_unpack_param(ctx, ctx->block_size, 20, 10),
};
return ac_build_gather_values(&ctx->ac, chan, 3);
}
case nir_intrinsic_load_tess_level_outer:
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true, false);
case nir_intrinsic_load_tess_level_inner:
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true, false);
case nir_intrinsic_load_tess_level_outer_default:
case nir_intrinsic_load_tess_level_inner_default: {
LLVMValueRef slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
LLVMValueRef buf = ac_get_arg(&ctx->ac, ctx->internal_bindings);
buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
int offset = op == nir_intrinsic_load_tess_level_inner_default ? 4 : 0;
LLVMValueRef val[4];
for (int i = 0; i < 4; i++)
val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0));
return ac_build_gather_values(&ctx->ac, val, 4);
}
case nir_intrinsic_load_patch_vertices_in:
if (ctx->stage == MESA_SHADER_TESS_CTRL)
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
else if (ctx->stage == MESA_SHADER_TESS_EVAL)
return si_get_num_tcs_out_vertices(ctx);
else
return NULL;
case nir_intrinsic_load_sample_mask_in:
return ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.sample_coverage));
default:
return NULL;
}
}
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
struct nir_shader *nir, bool free_nir, bool ngg_cull_shader)
{
@@ -741,6 +792,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
ctx->num_samplers = BITSET_LAST_BIT(info->base.textures_used);
ctx->num_images = info->base.num_images;
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
si_llvm_init_resource_callbacks(ctx);
si_llvm_create_main_func(ctx, ngg_cull_shader);
@@ -839,8 +892,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
}
case MESA_SHADER_COMPUTE:
ctx->abi.load_local_group_size = si_llvm_get_block_size;
if (nir->info.cs.user_data_components_amd) {
ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,

View File

@@ -31,12 +31,6 @@ LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
return si_unpack_param(ctx, ctx->args.ancillary, 8, 4);
}
static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
return ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.sample_coverage));
}
static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
@@ -987,6 +981,5 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx)
{
ctx->abi.load_sample_position = load_sample_position;
ctx->abi.load_sample_mask_in = load_sample_mask_in;
ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
}

View File

@@ -135,7 +135,7 @@ static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_conte
return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
}
static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx)
{
unsigned tcs_out_vertices =
ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out
@@ -219,7 +219,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
LLVMValueRef param_stride, constant16;
vertices_per_patch = get_num_tcs_out_vertices(ctx);
vertices_per_patch = si_get_num_tcs_out_vertices(ctx);
num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
num_patches = LLVMBuildAdd(ctx->ac.builder, num_patches, ctx->ac.i32_1, "");
total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, "");
@@ -563,79 +563,6 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
}
}
static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic)
{
LLVMValueRef base, addr;
int param = si_shader_io_get_unique_index_patch(semantic);
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
LLVMConstInt(ctx->ac.i32, param, 0));
return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true);
}
static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned sysval)
{
LLVMValueRef buf, slot, val[4];
int i, offset;
slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
buf = ac_get_arg(&ctx->ac, ctx->internal_bindings);
buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
offset = sysval == SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT ? 4 : 0;
for (i = 0; i < 4; i++)
val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0));
return ac_build_gather_values(&ctx->ac, val, 4);
}
static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id,
bool load_default_state)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
unsigned semantic;
if (load_default_state) {
switch (varying_id) {
case VARYING_SLOT_TESS_LEVEL_INNER:
semantic = SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
break;
case VARYING_SLOT_TESS_LEVEL_OUTER:
semantic = SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
break;
default:
unreachable("unknown tess level");
}
return load_tess_level_default(ctx, semantic);
}
switch (varying_id) {
case VARYING_SLOT_TESS_LEVEL_INNER:
semantic = VARYING_SLOT_TESS_LEVEL_INNER;
break;
case VARYING_SLOT_TESS_LEVEL_OUTER:
semantic = VARYING_SLOT_TESS_LEVEL_OUTER;
break;
default:
unreachable("unknown tess level");
}
return load_tess_level(ctx, semantic);
}
static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
if (ctx->stage == MESA_SHADER_TESS_CTRL)
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
else if (ctx->stage == MESA_SHADER_TESS_EVAL)
return get_num_tcs_out_vertices(ctx);
else
unreachable("invalid shader stage for VERTICESIN");
}
/**
* Forward all outputs from the vertex shader to the TES. This is only used
* for the fixed function TCS.
@@ -1086,14 +1013,10 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par
void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
{
ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
ctx->abi.load_tess_level = si_load_tess_level;
ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
}
void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
{
ctx->abi.load_tess_varyings = si_nir_load_input_tes;
ctx->abi.load_tess_level = si_load_tess_level;
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
}

View File

@@ -986,27 +986,7 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
si_llvm_build_ret(ctx, ret);
}
static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
/* This doesn't happen with GL: */
if (!non_indexed_is_zero)
return ac_get_arg(&ctx->ac, ctx->args.base_vertex);
/* For non-indexed draws, the base vertex set by the driver
* (for direct draws) or the CP (for indirect draws) is the
* first vertex ID, but GLSL expects 0 to be returned.
*/
LLVMValueRef indexed = si_unpack_param(ctx, ctx->vs_state_bits, 1, 1);
indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, "");
return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex),
ctx->ac.i32_0, "");
}
void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
{
ctx->abi.load_base_vertex = get_base_vertex;
ctx->abi.load_inputs = si_load_vs_input;
}