radeonsi: replace llvm ls/hs interface lds ops with nir lowered ones
Use ac nir lower pass to generate these lds load/store ops explicitly. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16418>
This commit is contained in:
@@ -3476,17 +3476,10 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
|||||||
|
|
||||||
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
|
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
|
||||||
(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
|
(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
|
||||||
bool vertex_index_is_invoc_id =
|
|
||||||
vertex_index_src &&
|
|
||||||
vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic &&
|
|
||||||
nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic ==
|
|
||||||
nir_intrinsic_load_invocation_id;
|
|
||||||
|
|
||||||
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
|
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
|
||||||
vertex_index, indir_index,
|
vertex_index, indir_index,
|
||||||
base, component,
|
base, component,
|
||||||
count, !is_output,
|
count, !is_output);
|
||||||
vertex_index_is_invoc_id);
|
|
||||||
if (instr->dest.ssa.bit_size == 16) {
|
if (instr->dest.ssa.bit_size == 16) {
|
||||||
result = ac_to_integer(&ctx->ac, result);
|
result = ac_to_integer(&ctx->ac, result);
|
||||||
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
|
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
|
||||||
|
@@ -67,8 +67,7 @@ struct ac_shader_abi {
|
|||||||
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
|
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||||
unsigned driver_location, unsigned component,
|
unsigned driver_location, unsigned component,
|
||||||
unsigned num_components,
|
unsigned num_components, bool load_inputs);
|
||||||
bool load_inputs, bool vertex_index_is_invoc_id);
|
|
||||||
|
|
||||||
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
|
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
|
||||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||||
|
@@ -1490,6 +1490,30 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key)
|
|||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned si_map_io_driver_location(unsigned semantic)
|
||||||
|
{
|
||||||
|
return si_shader_io_get_unique_index(semantic, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool si_lower_io_to_mem(const union si_shader_key *key,
|
||||||
|
nir_shader *nir,
|
||||||
|
uint64_t tcs_vgpr_only_inputs)
|
||||||
|
{
|
||||||
|
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||||
|
if (key->ge.as_ls) {
|
||||||
|
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, si_map_io_driver_location,
|
||||||
|
key->ge.opt.same_patch_vertices, tcs_vgpr_only_inputs);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||||
|
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location,
|
||||||
|
key->ge.opt.same_patch_vertices);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||||
const union si_shader_key *key,
|
const union si_shader_key *key,
|
||||||
bool *free_nir,
|
bool *free_nir,
|
||||||
@@ -1603,10 +1627,22 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
|||||||
* this should be done after that.
|
* this should be done after that.
|
||||||
*/
|
*/
|
||||||
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
|
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
|
||||||
if (progress2)
|
|
||||||
|
bool opt_offsets = si_lower_io_to_mem(key, nir, tcs_vgpr_only_inputs);
|
||||||
|
|
||||||
|
if (progress2 || opt_offsets)
|
||||||
si_nir_opts(sel->screen, nir, false);
|
si_nir_opts(sel->screen, nir, false);
|
||||||
|
|
||||||
if (progress || progress2)
|
if (opt_offsets) {
|
||||||
|
static const nir_opt_offsets_options offset_options = {
|
||||||
|
.uniform_max = 0,
|
||||||
|
.buffer_max = ~0,
|
||||||
|
.shared_max = ~0,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(nir, nir_opt_offsets, &offset_options);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress || progress2 || opt_offsets)
|
||||||
si_nir_late_opts(nir);
|
si_nir_late_opts(nir);
|
||||||
|
|
||||||
NIR_PASS_V(nir, nir_divergence_analysis);
|
NIR_PASS_V(nir, nir_divergence_analysis);
|
||||||
|
@@ -39,7 +39,6 @@ struct si_shader_output_values {
|
|||||||
struct si_shader_context {
|
struct si_shader_context {
|
||||||
struct ac_llvm_context ac;
|
struct ac_llvm_context ac;
|
||||||
struct si_shader *shader;
|
struct si_shader *shader;
|
||||||
struct si_shader_selector *next_shader_sel;
|
|
||||||
struct si_screen *screen;
|
struct si_screen *screen;
|
||||||
struct pipe_stream_output_info so;
|
struct pipe_stream_output_info so;
|
||||||
|
|
||||||
|
@@ -743,10 +743,10 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
|
|||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_tess_level_outer:
|
case nir_intrinsic_load_tess_level_outer:
|
||||||
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true, false);
|
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true);
|
||||||
|
|
||||||
case nir_intrinsic_load_tess_level_inner:
|
case nir_intrinsic_load_tess_level_inner:
|
||||||
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true, false);
|
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true);
|
||||||
|
|
||||||
case nir_intrinsic_load_tess_level_outer_default:
|
case nir_intrinsic_load_tess_level_outer_default:
|
||||||
case nir_intrinsic_load_tess_level_inner_default: {
|
case nir_intrinsic_load_tess_level_inner_default: {
|
||||||
@@ -1241,9 +1241,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||||||
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key);
|
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key);
|
||||||
parts[3] = ctx.main_fn;
|
parts[3] = ctx.main_fn;
|
||||||
|
|
||||||
/* VS as LS main part */
|
|
||||||
ctx.next_shader_sel = ctx.shader->selector;
|
|
||||||
|
|
||||||
struct si_shader shader_ls = {};
|
struct si_shader shader_ls = {};
|
||||||
shader_ls.selector = ls;
|
shader_ls.selector = ls;
|
||||||
shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog;
|
shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog;
|
||||||
|
@@ -384,44 +384,33 @@ void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
|
|||||||
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
|
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||||
unsigned driver_location, unsigned component,
|
unsigned driver_location, unsigned component,
|
||||||
unsigned num_components, bool load_input,
|
unsigned num_components, bool load_input)
|
||||||
bool vertex_index_is_invoc_id)
|
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||||
LLVMValueRef dw_addr, stride;
|
LLVMValueRef value[4];
|
||||||
ubyte semantic;
|
|
||||||
|
|
||||||
if (load_input) {
|
if (load_input) {
|
||||||
semantic = info->input[driver_location].semantic;
|
assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
|
||||||
} else {
|
|
||||||
semantic = info->output_semantic[driver_location];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Load the TCS input from a VGPR if possible. */
|
ubyte semantic = info->input[driver_location].semantic;
|
||||||
if (ctx->shader->key.ge.opt.same_patch_vertices &&
|
/* Load the TCS input from a VGPR. */
|
||||||
load_input && vertex_index_is_invoc_id && !param_index) {
|
|
||||||
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
|
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
|
||||||
si_shader_io_get_unique_index(semantic, false) * 4;
|
si_shader_io_get_unique_index(semantic, false) * 4;
|
||||||
LLVMValueRef value[4];
|
|
||||||
|
|
||||||
for (unsigned i = component; i < component + num_components; i++) {
|
for (unsigned i = component; i < component + num_components; i++) {
|
||||||
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
|
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
|
||||||
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
|
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_patch = vertex_index == NULL;
|
|
||||||
assert((semantic >= VARYING_SLOT_PATCH0 ||
|
|
||||||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
|
|
||||||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
|
|
||||||
|
|
||||||
if (load_input) {
|
|
||||||
stride = si_get_tcs_in_vertex_dw_stride(ctx);
|
|
||||||
dw_addr = get_tcs_in_current_patch_offset(ctx);
|
|
||||||
} else {
|
} else {
|
||||||
|
ubyte semantic = info->output_semantic[driver_location];
|
||||||
|
|
||||||
|
bool is_patch = vertex_index == NULL;
|
||||||
|
assert((semantic >= VARYING_SLOT_PATCH0 ||
|
||||||
|
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||||
|
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
|
||||||
|
|
||||||
|
LLVMValueRef dw_addr, stride;
|
||||||
if (is_patch) {
|
if (is_patch) {
|
||||||
stride = NULL;
|
stride = NULL;
|
||||||
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
|
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
|
||||||
@@ -429,23 +418,21 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
|
|||||||
stride = get_tcs_out_vertex_dw_stride(ctx);
|
stride = get_tcs_out_vertex_dw_stride(ctx);
|
||||||
dw_addr = get_tcs_out_current_patch_offset(ctx);
|
dw_addr = get_tcs_out_current_patch_offset(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index,
|
||||||
|
param_index, semantic);
|
||||||
|
|
||||||
|
for (unsigned i = component; i < component + num_components; i++)
|
||||||
|
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
|
|
||||||
semantic);
|
|
||||||
|
|
||||||
LLVMValueRef value[4];
|
|
||||||
for (unsigned i = component; i < component + num_components; i++)
|
|
||||||
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
|
|
||||||
|
|
||||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
|
static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||||
unsigned driver_location, unsigned component,
|
unsigned driver_location, unsigned component,
|
||||||
unsigned num_components,
|
unsigned num_components, bool load_input)
|
||||||
bool load_input, bool vertex_index_is_invoc_id)
|
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||||
@@ -877,58 +864,20 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
|
|||||||
{
|
{
|
||||||
struct si_shader *shader = ctx->shader;
|
struct si_shader *shader = ctx->shader;
|
||||||
struct si_shader_info *info = &shader->selector->info;
|
struct si_shader_info *info = &shader->selector->info;
|
||||||
unsigned i, chan;
|
|
||||||
LLVMValueRef vertex_id;
|
|
||||||
if (ctx->screen->info.gfx_level >= GFX11) {
|
|
||||||
vertex_id = ac_build_imad(&ctx->ac, si_unpack_param(ctx, ctx->args.tcs_wave_id, 0, 5),
|
|
||||||
LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, 0),
|
|
||||||
ac_get_thread_id(&ctx->ac));
|
|
||||||
} else {
|
|
||||||
vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id);
|
|
||||||
}
|
|
||||||
LLVMValueRef vertex_dw_stride = si_get_tcs_in_vertex_dw_stride(ctx);
|
|
||||||
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
|
|
||||||
LLVMValueRef *addrs = ctx->abi.outputs;
|
LLVMValueRef *addrs = ctx->abi.outputs;
|
||||||
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
|
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
|
||||||
|
|
||||||
/* Write outputs to LDS. The next shader (TCS aka HS) will read
|
if (shader->key.ge.opt.same_patch_vertices) {
|
||||||
* its inputs from it. */
|
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||||
for (i = 0; i < info->num_outputs; i++) {
|
unsigned semantic = info->output_semantic[i];
|
||||||
unsigned semantic = info->output_semantic[i];
|
int param = si_shader_io_get_unique_index(semantic, false);
|
||||||
|
|
||||||
/* The ARB_shader_viewport_layer_array spec contains the
|
for (unsigned chan = 0; chan < 4; chan++) {
|
||||||
* following issue:
|
if (!(info->output_usagemask[i] & (1 << chan)))
|
||||||
*
|
continue;
|
||||||
* 2) What happens if gl_ViewportIndex or gl_Layer is
|
|
||||||
* written in the vertex shader and a geometry shader is
|
|
||||||
* present?
|
|
||||||
*
|
|
||||||
* RESOLVED: The value written by the last vertex processing
|
|
||||||
* stage is used. If the last vertex processing stage
|
|
||||||
* (vertex, tessellation evaluation or geometry) does not
|
|
||||||
* statically assign to gl_ViewportIndex or gl_Layer, index
|
|
||||||
* or layer zero is assumed.
|
|
||||||
*
|
|
||||||
* So writes to those outputs in VS-as-LS are simply ignored.
|
|
||||||
*/
|
|
||||||
if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int param = si_shader_io_get_unique_index(semantic, false);
|
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||||
LLVMValueRef dw_addr =
|
|
||||||
LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
|
|
||||||
|
|
||||||
for (chan = 0; chan < 4; chan++) {
|
|
||||||
if (!(info->output_usagemask[i] & (1 << chan)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
|
||||||
|
|
||||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
|
||||||
!(ctx->next_shader_sel->info.tcs_vgpr_only_inputs & (1ull << semantic)))
|
|
||||||
lshs_lds_store(ctx, chan, dw_addr, value);
|
|
||||||
|
|
||||||
if (shader->key.ge.opt.same_patch_vertices) {
|
|
||||||
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
|
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
|
||||||
value, ret_offset + param * 4 + chan, "");
|
value, ret_offset + param * 4 + chan, "");
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user