radeonsi: add si_nir_lower_abi pass

This pass is for lower intrinsics to driver spec nir instructions,
so that each compiler backend don't need to implement their own.
Like radv_nir_lower_abi().

Currently only lower intrinsics in si_llvm_load_intrinsic().

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18010>
This commit is contained in:
Qiang Yu
2022-08-10 22:26:49 +08:00
committed by Marge Bot
parent e9f08d8193
commit a5bd39c7ed
5 changed files with 276 additions and 7 deletions

View File

@@ -45,6 +45,7 @@ files_libradeonsi = files(
'si_public.h',
'si_query.c',
'si_query.h',
'si_nir_lower_abi.c',
'si_nir_optim.c',
'si_sdma_copy_image.c',
'si_shader.c',

View File

@@ -0,0 +1,263 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "nir_builder.h"
#include "util/u_prim.h"
#include "ac_nir.h"
#include "si_pipe.h"
#include "si_query.h"
#include "si_state.h"
#include "si_shader_internal.h"
struct lower_abi_state {
struct si_shader *shader;
struct si_shader_args *args;
};
#define GET_FIELD_NIR(field) \
ac_nir_unpack_arg(b, &args->ac, args->vs_state_bits, \
field##__SHIFT, util_bitcount(field##__MASK))
static nir_ssa_def *load_internal_binding(nir_builder *b, struct si_shader_args *args,
unsigned slot)
{
nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->internal_bindings);
return nir_load_smem_amd(b, 4, addr, nir_imm_int(b, slot * 16));
}
static nir_ssa_def *get_num_vert_per_prim(nir_builder *b, struct si_shader *shader,
struct si_shader_args *args)
{
const struct si_shader_info *info = &shader->selector->info;
gl_shader_stage stage = shader->selector->stage;
unsigned num_vertices;
if (stage == MESA_SHADER_GEOMETRY) {
num_vertices = u_vertices_per_prim(info->base.gs.output_primitive);
} else if (stage == MESA_SHADER_VERTEX) {
if (info->base.vs.blit_sgprs_amd)
num_vertices = 3;
else if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES)
num_vertices = 2;
else {
/* Extract OUTPRIM field. */
nir_ssa_def *num = GET_FIELD_NIR(GS_STATE_OUTPRIM);
return nir_iadd_imm(b, num, 1);
}
} else {
assert(stage == MESA_SHADER_TESS_EVAL);
if (info->base.tess.point_mode)
num_vertices = 1;
else if (info->base.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
num_vertices = 2;
else
num_vertices = 3;
}
return nir_imm_int(b, num_vertices);
}
static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_state *s)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
struct si_shader *shader = s->shader;
struct si_shader_args *args = s->args;
struct si_shader_selector *sel = shader->selector;
union si_shader_key *key = &shader->key;
gl_shader_stage stage = sel->stage;
b->cursor = nir_before_instr(instr);
nir_ssa_def *replacement = NULL;
switch (intrin->intrinsic) {
case nir_intrinsic_load_first_vertex:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.base_vertex);
break;
case nir_intrinsic_load_base_vertex: {
nir_ssa_def *indexed = GET_FIELD_NIR(VS_STATE_INDEXED);
indexed = nir_i2b(b, indexed);
nir_ssa_def *base_vertex = ac_nir_load_arg(b, &args->ac, args->ac.base_vertex);
replacement = nir_bcsel(b, indexed, base_vertex, nir_imm_int(b, 0));
break;
}
case nir_intrinsic_load_workgroup_size: {
assert(sel->info.base.workgroup_size_variable && sel->info.uses_variable_block_size);
nir_ssa_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size);
nir_ssa_def *comp[] = {
nir_ubfe_imm(b, block_size, 0, 10),
nir_ubfe_imm(b, block_size, 10, 10),
nir_ubfe_imm(b, block_size, 20, 10),
};
replacement = nir_vec(b, comp, 3);
break;
}
case nir_intrinsic_load_tess_level_outer_default:
case nir_intrinsic_load_tess_level_inner_default: {
nir_ssa_def *buf = load_internal_binding(b, args, SI_HS_CONST_DEFAULT_TESS_LEVELS);
unsigned num_components = intrin->dest.ssa.num_components;
unsigned offset =
intrin->intrinsic == nir_intrinsic_load_tess_level_inner_default ? 16 : 0;
replacement = nir_load_smem_buffer_amd(b, num_components, buf, nir_imm_int(b, offset));
break;
}
case nir_intrinsic_load_patch_vertices_in:
if (stage == MESA_SHADER_TESS_CTRL)
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_out_lds_layout, 13, 6);
else if (stage == MESA_SHADER_TESS_EVAL) {
nir_ssa_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5);
replacement = nir_iadd_imm(b, tmp, 1);
} else
unreachable("no nir_load_patch_vertices_in");
break;
case nir_intrinsic_load_sample_mask_in:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage);
break;
case nir_intrinsic_load_lshs_vertex_stride_amd:
if (stage == MESA_SHADER_VERTEX)
replacement = nir_imm_int(b, sel->info.lshs_vertex_stride);
else if (stage == MESA_SHADER_TESS_CTRL)
replacement = sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic ?
nir_imm_int(b, key->ge.part.tcs.ls->info.lshs_vertex_stride) :
nir_ishl_imm(b, GET_FIELD_NIR(VS_STATE_LS_OUT_VERTEX_SIZE), 2);
else
unreachable("no nir_load_lshs_vertex_stride_amd");
break;
case nir_intrinsic_load_tcs_num_patches_amd: {
nir_ssa_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6);
replacement = nir_iadd_imm(b, tmp, 1);
break;
}
case nir_intrinsic_load_hs_out_patch_data_offset_amd:
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 11, 21);
break;
case nir_intrinsic_load_ring_tess_offchip_offset_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.tess_offchip_offset);
break;
case nir_intrinsic_load_ring_es2gs_offset_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.es2gs_offset);
break;
case nir_intrinsic_load_clip_half_line_width_amd: {
nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32));
break;
}
case nir_intrinsic_load_viewport_xy_scale_and_offset: {
bool prim_is_lines = key->ge.opt.ngg_culling & SI_NGG_CULL_LINES;
nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
unsigned offset = prim_is_lines ? 16 : 0;
replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, offset));
break;
}
case nir_intrinsic_load_num_vertices_per_primitive_amd:
replacement = get_num_vert_per_prim(b, shader, args);
break;
case nir_intrinsic_load_cull_ccw_amd:
/* radeonsi embed cw/ccw info into front/back face enabled */
replacement = nir_imm_bool(b, false);
break;
case nir_intrinsic_load_cull_any_enabled_amd:
replacement = nir_imm_bool(b, !!key->ge.opt.ngg_culling);
break;
case nir_intrinsic_load_cull_back_face_enabled_amd:
replacement = nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE);
break;
case nir_intrinsic_load_cull_front_face_enabled_amd:
replacement = nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE);
break;
case nir_intrinsic_load_cull_small_prim_precision_amd: {
nir_ssa_def *small_prim_precision =
key->ge.opt.ngg_culling & SI_NGG_CULL_LINES ?
GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION_NO_AA) :
GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION);
/* Extract the small prim precision. */
small_prim_precision = nir_ior_imm(b, small_prim_precision, 0x70);
replacement = nir_ishl_imm(b, small_prim_precision, 23);
break;
}
case nir_intrinsic_load_cull_small_primitives_enabled_amd: {
unsigned mask = SI_NGG_CULL_LINES | SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT;
replacement = nir_imm_bool(b, (key->ge.opt.ngg_culling & mask) != SI_NGG_CULL_LINES);
break;
}
case nir_intrinsic_load_provoking_vtx_in_prim_amd:
replacement = GET_FIELD_NIR(GS_STATE_PROVOKING_VTX_INDEX);
break;
case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_PIPELINE_STATS_EMU));
break;
case nir_intrinsic_load_prim_gen_query_enabled_amd:
case nir_intrinsic_load_prim_xfb_query_enabled_amd:
replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_STREAMOUT_QUERY_ENABLED));
break;
case nir_intrinsic_load_clamp_vertex_color_amd:
replacement = nir_i2b(b, GET_FIELD_NIR(VS_STATE_CLAMP_VERTEX_COLOR));
break;
default:
return false;
}
if (replacement)
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_instr_remove(instr);
nir_instr_free(instr);
return true;
}
bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args)
{
struct lower_abi_state state = {
.shader = shader,
.args = args,
};
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_builder b;
nir_builder_init(&b, impl);
bool progress = false;
nir_foreach_block_safe(block, impl) {
nir_foreach_instr_safe(instr, block) {
progress |= lower_abi_instr(&b, instr, &state);
}
}
nir_metadata preserved = progress ?
nir_metadata_dominance | nir_metadata_block_index :
nir_metadata_all;
nir_metadata_preserve(impl, preserved);
return progress;
}

View File

@@ -1789,8 +1789,8 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader)
si_nir_assign_param_offsets(nir, shader, slot_remap);
}
struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir,
uint64_t tcs_vgpr_only_inputs)
struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args,
bool *free_nir, uint64_t tcs_vgpr_only_inputs)
{
struct si_shader_selector *sel = shader->selector;
const union si_shader_key *key = &shader->key;
@@ -1928,6 +1928,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir,
opt_offsets = true;
}
NIR_PASS(progress2, nir, si_nir_lower_abi, shader, args);
if (progress2 || opt_offsets)
si_nir_opts(sel->screen, nir, false);
@@ -1976,7 +1978,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
si_init_shader_args(shader, &args);
bool free_nir;
struct nir_shader *nir = si_get_nir_shader(shader, &free_nir, 0);
struct nir_shader *nir = si_get_nir_shader(shader, &args, &free_nir, 0);
struct pipe_stream_output_info so = {};
/* NGG streamout has been lowered to buffer store in nir. */

View File

@@ -169,8 +169,8 @@ bool si_vs_needs_prolog(const struct si_shader_selector *sel,
void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_sgprs,
const struct si_vs_prolog_bits *prolog_key,
struct si_shader *shader_out, union si_shader_part_key *key);
struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir,
uint64_t tcs_vgpr_only_inputs);
struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args,
bool *free_nir, uint64_t tcs_vgpr_only_inputs);
void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *key);
bool si_need_ps_prolog(const union si_shader_part_key *key);
void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key,
@@ -189,6 +189,9 @@ void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx);
unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
/* si_nir_lower_abi.c */
bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args);
/* si_shader_llvm.c */
bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
struct ac_shader_config *conf, struct ac_llvm_compiler *compiler,

View File

@@ -1350,7 +1350,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader_ls.is_monolithic = true;
si_init_shader_args(&shader_ls, ctx.args);
nir = si_get_nir_shader(&shader_ls, &free_nir, sel->info.tcs_vgpr_only_inputs);
nir = si_get_nir_shader(&shader_ls, ctx.args, &free_nir, sel->info.tcs_vgpr_only_inputs);
si_update_shader_binary_info(shader, nir);
if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir)) {
@@ -1422,7 +1422,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader_es.is_monolithic = true;
si_init_shader_args(&shader_es, ctx.args);
nir = si_get_nir_shader(&shader_es, &free_nir, 0);
nir = si_get_nir_shader(&shader_es, ctx.args, &free_nir, 0);
si_update_shader_binary_info(shader, nir);
if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir)) {