radeonsi: replace llvm ls/hs interface lds ops with nir lowered ones
Use ac nir lower pass to generate these lds load/store ops explicitly. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16418>
This commit is contained in:
@@ -3476,17 +3476,10 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||
|
||||
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
|
||||
(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
|
||||
bool vertex_index_is_invoc_id =
|
||||
vertex_index_src &&
|
||||
vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic &&
|
||||
nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic ==
|
||||
nir_intrinsic_load_invocation_id;
|
||||
|
||||
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
|
||||
vertex_index, indir_index,
|
||||
base, component,
|
||||
count, !is_output,
|
||||
vertex_index_is_invoc_id);
|
||||
count, !is_output);
|
||||
if (instr->dest.ssa.bit_size == 16) {
|
||||
result = ac_to_integer(&ctx->ac, result);
|
||||
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
|
||||
|
@@ -67,8 +67,7 @@ struct ac_shader_abi {
|
||||
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||
unsigned driver_location, unsigned component,
|
||||
unsigned num_components,
|
||||
bool load_inputs, bool vertex_index_is_invoc_id);
|
||||
unsigned num_components, bool load_inputs);
|
||||
|
||||
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
|
||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||
|
@@ -1490,6 +1490,30 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key)
|
||||
return progress;
|
||||
}
|
||||
|
||||
static unsigned si_map_io_driver_location(unsigned semantic)
|
||||
{
|
||||
return si_shader_io_get_unique_index(semantic, false);
|
||||
}
|
||||
|
||||
static bool si_lower_io_to_mem(const union si_shader_key *key,
|
||||
nir_shader *nir,
|
||||
uint64_t tcs_vgpr_only_inputs)
|
||||
{
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
if (key->ge.as_ls) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, si_map_io_driver_location,
|
||||
key->ge.opt.same_patch_vertices, tcs_vgpr_only_inputs);
|
||||
return true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location,
|
||||
key->ge.opt.same_patch_vertices);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||
const union si_shader_key *key,
|
||||
bool *free_nir,
|
||||
@@ -1603,10 +1627,22 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||
* this should be done after that.
|
||||
*/
|
||||
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
|
||||
if (progress2)
|
||||
|
||||
bool opt_offsets = si_lower_io_to_mem(key, nir, tcs_vgpr_only_inputs);
|
||||
|
||||
if (progress2 || opt_offsets)
|
||||
si_nir_opts(sel->screen, nir, false);
|
||||
|
||||
if (progress || progress2)
|
||||
if (opt_offsets) {
|
||||
static const nir_opt_offsets_options offset_options = {
|
||||
.uniform_max = 0,
|
||||
.buffer_max = ~0,
|
||||
.shared_max = ~0,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_opt_offsets, &offset_options);
|
||||
}
|
||||
|
||||
if (progress || progress2 || opt_offsets)
|
||||
si_nir_late_opts(nir);
|
||||
|
||||
NIR_PASS_V(nir, nir_divergence_analysis);
|
||||
|
@@ -39,7 +39,6 @@ struct si_shader_output_values {
|
||||
struct si_shader_context {
|
||||
struct ac_llvm_context ac;
|
||||
struct si_shader *shader;
|
||||
struct si_shader_selector *next_shader_sel;
|
||||
struct si_screen *screen;
|
||||
struct pipe_stream_output_info so;
|
||||
|
||||
|
@@ -743,10 +743,10 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_tess_level_outer:
|
||||
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true, false);
|
||||
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true);
|
||||
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true, false);
|
||||
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true);
|
||||
|
||||
case nir_intrinsic_load_tess_level_outer_default:
|
||||
case nir_intrinsic_load_tess_level_inner_default: {
|
||||
@@ -1241,9 +1241,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key);
|
||||
parts[3] = ctx.main_fn;
|
||||
|
||||
/* VS as LS main part */
|
||||
ctx.next_shader_sel = ctx.shader->selector;
|
||||
|
||||
struct si_shader shader_ls = {};
|
||||
shader_ls.selector = ls;
|
||||
shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog;
|
||||
|
@@ -384,44 +384,33 @@ void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
|
||||
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||
unsigned driver_location, unsigned component,
|
||||
unsigned num_components, bool load_input,
|
||||
bool vertex_index_is_invoc_id)
|
||||
unsigned num_components, bool load_input)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
LLVMValueRef dw_addr, stride;
|
||||
ubyte semantic;
|
||||
LLVMValueRef value[4];
|
||||
|
||||
if (load_input) {
|
||||
semantic = info->input[driver_location].semantic;
|
||||
} else {
|
||||
semantic = info->output_semantic[driver_location];
|
||||
}
|
||||
assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
|
||||
|
||||
/* Load the TCS input from a VGPR if possible. */
|
||||
if (ctx->shader->key.ge.opt.same_patch_vertices &&
|
||||
load_input && vertex_index_is_invoc_id && !param_index) {
|
||||
ubyte semantic = info->input[driver_location].semantic;
|
||||
/* Load the TCS input from a VGPR. */
|
||||
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
|
||||
si_shader_io_get_unique_index(semantic, false) * 4;
|
||||
LLVMValueRef value[4];
|
||||
|
||||
for (unsigned i = component; i < component + num_components; i++) {
|
||||
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
|
||||
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
|
||||
}
|
||||
|
||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||
}
|
||||
} else {
|
||||
ubyte semantic = info->output_semantic[driver_location];
|
||||
|
||||
bool is_patch = vertex_index == NULL;
|
||||
assert((semantic >= VARYING_SLOT_PATCH0 ||
|
||||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
|
||||
|
||||
if (load_input) {
|
||||
stride = si_get_tcs_in_vertex_dw_stride(ctx);
|
||||
dw_addr = get_tcs_in_current_patch_offset(ctx);
|
||||
} else {
|
||||
LLVMValueRef dw_addr, stride;
|
||||
if (is_patch) {
|
||||
stride = NULL;
|
||||
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
|
||||
@@ -429,14 +418,13 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
|
||||
stride = get_tcs_out_vertex_dw_stride(ctx);
|
||||
dw_addr = get_tcs_out_current_patch_offset(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
|
||||
semantic);
|
||||
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index,
|
||||
param_index, semantic);
|
||||
|
||||
LLVMValueRef value[4];
|
||||
for (unsigned i = component; i < component + num_components; i++)
|
||||
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
|
||||
}
|
||||
|
||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||
}
|
||||
@@ -444,8 +432,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
|
||||
static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||
unsigned driver_location, unsigned component,
|
||||
unsigned num_components,
|
||||
bool load_input, bool vertex_index_is_invoc_id)
|
||||
unsigned num_components, bool load_input)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
@@ -877,58 +864,20 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
|
||||
{
|
||||
struct si_shader *shader = ctx->shader;
|
||||
struct si_shader_info *info = &shader->selector->info;
|
||||
unsigned i, chan;
|
||||
LLVMValueRef vertex_id;
|
||||
if (ctx->screen->info.gfx_level >= GFX11) {
|
||||
vertex_id = ac_build_imad(&ctx->ac, si_unpack_param(ctx, ctx->args.tcs_wave_id, 0, 5),
|
||||
LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, 0),
|
||||
ac_get_thread_id(&ctx->ac));
|
||||
} else {
|
||||
vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id);
|
||||
}
|
||||
LLVMValueRef vertex_dw_stride = si_get_tcs_in_vertex_dw_stride(ctx);
|
||||
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
|
||||
LLVMValueRef *addrs = ctx->abi.outputs;
|
||||
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
|
||||
|
||||
/* Write outputs to LDS. The next shader (TCS aka HS) will read
|
||||
* its inputs from it. */
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
unsigned semantic = info->output_semantic[i];
|
||||
|
||||
/* The ARB_shader_viewport_layer_array spec contains the
|
||||
* following issue:
|
||||
*
|
||||
* 2) What happens if gl_ViewportIndex or gl_Layer is
|
||||
* written in the vertex shader and a geometry shader is
|
||||
* present?
|
||||
*
|
||||
* RESOLVED: The value written by the last vertex processing
|
||||
* stage is used. If the last vertex processing stage
|
||||
* (vertex, tessellation evaluation or geometry) does not
|
||||
* statically assign to gl_ViewportIndex or gl_Layer, index
|
||||
* or layer zero is assumed.
|
||||
*
|
||||
* So writes to those outputs in VS-as-LS are simply ignored.
|
||||
*/
|
||||
if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT)
|
||||
continue;
|
||||
|
||||
int param = si_shader_io_get_unique_index(semantic, false);
|
||||
LLVMValueRef dw_addr =
|
||||
LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
|
||||
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (!(info->output_usagemask[i] & (1 << chan)))
|
||||
continue;
|
||||
|
||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||
|
||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
!(ctx->next_shader_sel->info.tcs_vgpr_only_inputs & (1ull << semantic)))
|
||||
lshs_lds_store(ctx, chan, dw_addr, value);
|
||||
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
|
||||
value, ret_offset + param * 4 + chan, "");
|
||||
}
|
||||
|
Reference in New Issue
Block a user