From a5bd39c7eddcd284d545b3e537702b8400c98f6b Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Wed, 10 Aug 2022 22:26:49 +0800 Subject: [PATCH] radeonsi: add si_nir_lower_abi pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pass is for lower intrinsics to driver spec nir instructions, so that each compiler backend don't need to implement their own. Like radv_nir_lower_abi(). Currently only lower intrinsics in si_llvm_load_intrinsic(). Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/gallium/drivers/radeonsi/meson.build | 1 + .../drivers/radeonsi/si_nir_lower_abi.c | 263 ++++++++++++++++++ src/gallium/drivers/radeonsi/si_shader.c | 8 +- .../drivers/radeonsi/si_shader_internal.h | 7 +- src/gallium/drivers/radeonsi/si_shader_llvm.c | 4 +- 5 files changed, 276 insertions(+), 7 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/si_nir_lower_abi.c diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 12ceae6e9b7..18099fa28fe 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -45,6 +45,7 @@ files_libradeonsi = files( 'si_public.h', 'si_query.c', 'si_query.h', + 'si_nir_lower_abi.c', 'si_nir_optim.c', 'si_sdma_copy_image.c', 'si_shader.c', diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c new file mode 100644 index 00000000000..0668f0cb2d9 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -0,0 +1,263 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nir_builder.h" +#include "util/u_prim.h" + +#include "ac_nir.h" +#include "si_pipe.h" +#include "si_query.h" +#include "si_state.h" +#include "si_shader_internal.h" + +struct lower_abi_state { + struct si_shader *shader; + struct si_shader_args *args; +}; + +#define GET_FIELD_NIR(field) \ + ac_nir_unpack_arg(b, &args->ac, args->vs_state_bits, \ + field##__SHIFT, util_bitcount(field##__MASK)) + +static nir_ssa_def *load_internal_binding(nir_builder *b, struct si_shader_args *args, + unsigned slot) +{ + nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->internal_bindings); + return nir_load_smem_amd(b, 4, addr, nir_imm_int(b, slot * 16)); +} + +static nir_ssa_def *get_num_vert_per_prim(nir_builder *b, struct si_shader *shader, + struct si_shader_args *args) +{ + const struct si_shader_info *info = &shader->selector->info; + gl_shader_stage stage = shader->selector->stage; + + unsigned num_vertices; + if (stage == MESA_SHADER_GEOMETRY) { + num_vertices = u_vertices_per_prim(info->base.gs.output_primitive); + } else if (stage == MESA_SHADER_VERTEX) { + if (info->base.vs.blit_sgprs_amd) + num_vertices = 3; + else if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) + num_vertices = 2; + else { + /* Extract OUTPRIM field. */ + nir_ssa_def *num = GET_FIELD_NIR(GS_STATE_OUTPRIM); + return nir_iadd_imm(b, num, 1); + } + } else { + assert(stage == MESA_SHADER_TESS_EVAL); + + if (info->base.tess.point_mode) + num_vertices = 1; + else if (info->base.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) + num_vertices = 2; + else + num_vertices = 3; + } + return nir_imm_int(b, num_vertices); +} + +static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_state *s) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + struct si_shader *shader = s->shader; + struct si_shader_args *args = s->args; + struct si_shader_selector *sel = shader->selector; + union si_shader_key *key = &shader->key; + gl_shader_stage stage = sel->stage; + + b->cursor = nir_before_instr(instr); + + nir_ssa_def *replacement = NULL; + + switch (intrin->intrinsic) { + case nir_intrinsic_load_first_vertex: + replacement = ac_nir_load_arg(b, &args->ac, args->ac.base_vertex); + break; + case nir_intrinsic_load_base_vertex: { + nir_ssa_def *indexed = GET_FIELD_NIR(VS_STATE_INDEXED); + indexed = nir_i2b(b, indexed); + + nir_ssa_def *base_vertex = ac_nir_load_arg(b, &args->ac, args->ac.base_vertex); + replacement = nir_bcsel(b, indexed, base_vertex, nir_imm_int(b, 0)); + break; + } + case nir_intrinsic_load_workgroup_size: { + assert(sel->info.base.workgroup_size_variable && sel->info.uses_variable_block_size); + + nir_ssa_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size); + nir_ssa_def *comp[] = { + nir_ubfe_imm(b, block_size, 0, 10), + nir_ubfe_imm(b, block_size, 10, 10), + nir_ubfe_imm(b, block_size, 20, 10), + }; + replacement = nir_vec(b, comp, 3); + break; + } + case nir_intrinsic_load_tess_level_outer_default: + case nir_intrinsic_load_tess_level_inner_default: { + nir_ssa_def *buf = load_internal_binding(b, args, SI_HS_CONST_DEFAULT_TESS_LEVELS); + unsigned num_components = intrin->dest.ssa.num_components; + unsigned offset = + intrin->intrinsic == nir_intrinsic_load_tess_level_inner_default ? 16 : 0; + replacement = nir_load_smem_buffer_amd(b, num_components, buf, nir_imm_int(b, offset)); + break; + } + case nir_intrinsic_load_patch_vertices_in: + if (stage == MESA_SHADER_TESS_CTRL) + replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_out_lds_layout, 13, 6); + else if (stage == MESA_SHADER_TESS_EVAL) { + nir_ssa_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5); + replacement = nir_iadd_imm(b, tmp, 1); + } else + unreachable("no nir_load_patch_vertices_in"); + break; + case nir_intrinsic_load_sample_mask_in: + replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage); + break; + case nir_intrinsic_load_lshs_vertex_stride_amd: + if (stage == MESA_SHADER_VERTEX) + replacement = nir_imm_int(b, sel->info.lshs_vertex_stride); + else if (stage == MESA_SHADER_TESS_CTRL) + replacement = sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic ? + nir_imm_int(b, key->ge.part.tcs.ls->info.lshs_vertex_stride) : + nir_ishl_imm(b, GET_FIELD_NIR(VS_STATE_LS_OUT_VERTEX_SIZE), 2); + else + unreachable("no nir_load_lshs_vertex_stride_amd"); + break; + case nir_intrinsic_load_tcs_num_patches_amd: { + nir_ssa_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6); + replacement = nir_iadd_imm(b, tmp, 1); + break; + } + case nir_intrinsic_load_hs_out_patch_data_offset_amd: + replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 11, 21); + break; + case nir_intrinsic_load_ring_tess_offchip_offset_amd: + replacement = ac_nir_load_arg(b, &args->ac, args->ac.tess_offchip_offset); + break; + case nir_intrinsic_load_ring_es2gs_offset_amd: + replacement = ac_nir_load_arg(b, &args->ac, args->ac.es2gs_offset); + break; + case nir_intrinsic_load_clip_half_line_width_amd: { + nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info); + replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32)); + break; + } + case nir_intrinsic_load_viewport_xy_scale_and_offset: { + bool prim_is_lines = key->ge.opt.ngg_culling & SI_NGG_CULL_LINES; + nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info); + unsigned offset = prim_is_lines ? 16 : 0; + replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, offset)); + break; + } + case nir_intrinsic_load_num_vertices_per_primitive_amd: + replacement = get_num_vert_per_prim(b, shader, args); + break; + case nir_intrinsic_load_cull_ccw_amd: + /* radeonsi embed cw/ccw info into front/back face enabled */ + replacement = nir_imm_bool(b, false); + break; + case nir_intrinsic_load_cull_any_enabled_amd: + replacement = nir_imm_bool(b, !!key->ge.opt.ngg_culling); + break; + case nir_intrinsic_load_cull_back_face_enabled_amd: + replacement = nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE); + break; + case nir_intrinsic_load_cull_front_face_enabled_amd: + replacement = nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE); + break; + case nir_intrinsic_load_cull_small_prim_precision_amd: { + nir_ssa_def *small_prim_precision = + key->ge.opt.ngg_culling & SI_NGG_CULL_LINES ? + GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION_NO_AA) : + GET_FIELD_NIR(GS_STATE_SMALL_PRIM_PRECISION); + + /* Extract the small prim precision. */ + small_prim_precision = nir_ior_imm(b, small_prim_precision, 0x70); + replacement = nir_ishl_imm(b, small_prim_precision, 23); + break; + } + case nir_intrinsic_load_cull_small_primitives_enabled_amd: { + unsigned mask = SI_NGG_CULL_LINES | SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT; + replacement = nir_imm_bool(b, (key->ge.opt.ngg_culling & mask) != SI_NGG_CULL_LINES); + break; + } + case nir_intrinsic_load_provoking_vtx_in_prim_amd: + replacement = GET_FIELD_NIR(GS_STATE_PROVOKING_VTX_INDEX); + break; + case nir_intrinsic_load_pipeline_stat_query_enabled_amd: + replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_PIPELINE_STATS_EMU)); + break; + case nir_intrinsic_load_prim_gen_query_enabled_amd: + case nir_intrinsic_load_prim_xfb_query_enabled_amd: + replacement = nir_i2b(b, GET_FIELD_NIR(GS_STATE_STREAMOUT_QUERY_ENABLED)); + break; + case nir_intrinsic_load_clamp_vertex_color_amd: + replacement = nir_i2b(b, GET_FIELD_NIR(VS_STATE_CLAMP_VERTEX_COLOR)); + break; + default: + return false; + } + + if (replacement) + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement); + + nir_instr_remove(instr); + nir_instr_free(instr); + + return true; +} + +bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args) +{ + struct lower_abi_state state = { + .shader = shader, + .args = args, + }; + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block_safe(block, impl) { + nir_foreach_instr_safe(instr, block) { + progress |= lower_abi_instr(&b, instr, &state); + } + } + + nir_metadata preserved = progress ? + nir_metadata_dominance | nir_metadata_block_index : + nir_metadata_all; + nir_metadata_preserve(impl, preserved); + + return progress; +} diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5e8f1f29337..e21ab6700f7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1789,8 +1789,8 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader) si_nir_assign_param_offsets(nir, shader, slot_remap); } -struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir, - uint64_t tcs_vgpr_only_inputs) +struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args, + bool *free_nir, uint64_t tcs_vgpr_only_inputs) { struct si_shader_selector *sel = shader->selector; const union si_shader_key *key = &shader->key; @@ -1928,6 +1928,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir, opt_offsets = true; } + NIR_PASS(progress2, nir, si_nir_lower_abi, shader, args); + if (progress2 || opt_offsets) si_nir_opts(sel->screen, nir, false); @@ -1976,7 +1978,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi si_init_shader_args(shader, &args); bool free_nir; - struct nir_shader *nir = si_get_nir_shader(shader, &free_nir, 0); + struct nir_shader *nir = si_get_nir_shader(shader, &args, &free_nir, 0); struct pipe_stream_output_info so = {}; /* NGG streamout has been lowered to buffer store in nir. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 83472047bac..4df29868f74 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -169,8 +169,8 @@ bool si_vs_needs_prolog(const struct si_shader_selector *sel, void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_sgprs, const struct si_vs_prolog_bits *prolog_key, struct si_shader *shader_out, union si_shader_part_key *key); -struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir, - uint64_t tcs_vgpr_only_inputs); +struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args, + bool *free_nir, uint64_t tcs_vgpr_only_inputs); void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *key); bool si_need_ps_prolog(const union si_shader_part_key *key); void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key, @@ -189,6 +189,9 @@ void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx); unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader); bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader); +/* si_nir_lower_abi.c */ +bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args); + /* si_shader_llvm.c */ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary, struct ac_shader_config *conf, struct ac_llvm_compiler *compiler, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index a7316ad5428..0258c430ad2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1350,7 +1350,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * shader_ls.is_monolithic = true; si_init_shader_args(&shader_ls, ctx.args); - nir = si_get_nir_shader(&shader_ls, &free_nir, sel->info.tcs_vgpr_only_inputs); + nir = si_get_nir_shader(&shader_ls, ctx.args, &free_nir, sel->info.tcs_vgpr_only_inputs); si_update_shader_binary_info(shader, nir); if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir)) { @@ -1422,7 +1422,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * shader_es.is_monolithic = true; si_init_shader_args(&shader_es, ctx.args); - nir = si_get_nir_shader(&shader_es, &free_nir, 0); + nir = si_get_nir_shader(&shader_es, ctx.args, &free_nir, 0); si_update_shader_binary_info(shader, nir); if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir)) {