Files
third_party_mesa3d/src/amd/common/nir/ac_nir.h

387 lines
13 KiB
C

/*
* Copyright © 2021 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef AC_NIR_H
#define AC_NIR_H
#include "ac_hw_stage.h"
#include "ac_shader_args.h"
#include "ac_shader_util.h"
#include "nir.h"
#ifdef __cplusplus
extern "C" {
#endif
enum
{
/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
AC_EXP_PARAM_OFFSET_0 = 0,
AC_EXP_PARAM_OFFSET_31 = 31,
/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
AC_EXP_PARAM_DEFAULT_VAL_0001,
AC_EXP_PARAM_DEFAULT_VAL_1110,
AC_EXP_PARAM_DEFAULT_VAL_1111,
AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */
};
enum {
AC_EXP_FLAG_COMPRESSED = (1 << 0),
AC_EXP_FLAG_DONE = (1 << 1),
AC_EXP_FLAG_VALID_MASK = (1 << 2),
};
struct ac_nir_config {
enum amd_gfx_level gfx_level;
bool uses_aco;
};
/* TODO: Remove these once radeonsi gathers shader_info before lowering. */
#define AC_VECTOR_ARG_FLAG(name, value) (((name) & 0xf) | ((value) << 4))
#define AC_VECTOR_ARG_UNSET 0
#define AC_VECTOR_ARG_INTERP_MODE 1
#define AC_VECTOR_ARG_IS_COLOR 2
#define AC_VECTOR_ARG_FLAG_GET_NAME(intr) (nir_intrinsic_flags(intr) & 0xf)
#define AC_VECTOR_ARG_FLAG_GET_VALUE(intr) (nir_intrinsic_flags(intr) >> 4)
/* Maps I/O semantics to the actual location used by the lowering pass. */
typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
/* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
struct nir_builder;
typedef struct nir_builder nir_builder;
struct nir_xfb_info;
typedef struct nir_xfb_info nir_xfb_info;
/* Executed by ac_nir_cull when the current primitive is accepted. */
typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
void
ac_nir_set_options(struct radeon_info *info, bool use_llvm,
nir_shader_compiler_options *options);
nir_def *
ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
struct ac_arg arg, unsigned relative_index);
nir_def *
ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg);
nir_def *
ac_nir_load_arg_upper_bound(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
unsigned upper_bound);
void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
nir_def *val);
nir_def *
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
unsigned rshift, unsigned bitwidth);
bool ac_nir_lower_sin_cos(nir_shader *shader);
bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,
bool has_ls_vgpr_init_bug, const enum ac_hw_stage hw_stage,
unsigned wave_size, unsigned workgroup_size,
const struct ac_shader_args *ac_args);
nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir);
bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
void
ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
ac_nir_map_io_driver_location map,
enum amd_gfx_level gfx_level,
bool tcs_in_out_eq,
uint64_t tcs_inputs_via_temp,
uint64_t tcs_inputs_via_lds);
void
ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map,
enum amd_gfx_level gfx_level,
bool tcs_in_out_eq,
uint64_t tcs_inputs_via_temp,
uint64_t tcs_inputs_via_lds);
void
ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info,
ac_nir_map_io_driver_location map,
enum amd_gfx_level gfx_level,
uint64_t tes_inputs_read,
uint32_t tes_patch_inputs_read,
unsigned wave_size);
void
ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map);
void
ac_nir_compute_tess_wg_info(const struct radeon_info *info, const struct shader_info *tcs_info,
unsigned wave_size, bool tess_uses_primid, bool all_invocations_define_tess_levels,
unsigned num_tcs_input_cp, unsigned lds_input_vertex_size,
unsigned num_mem_tcs_outputs, unsigned num_mem_tcs_patch_outputs,
unsigned *num_patches_per_wg, unsigned *hw_lds_size);
void
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map,
enum amd_gfx_level gfx_level,
unsigned esgs_itemsize,
uint64_t gs_inputs_read);
void
ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map,
enum amd_gfx_level gfx_level,
bool triangle_strip_adjacency_fix);
bool
ac_nir_lower_indirect_derefs(nir_shader *shader,
enum amd_gfx_level gfx_level);
typedef struct {
enum radeon_family family;
enum amd_gfx_level gfx_level;
unsigned max_workgroup_size;
unsigned wave_size;
uint8_t clip_cull_dist_mask;
const uint8_t *vs_output_param_offset; /* GFX11+ */
bool has_param_exports;
bool can_cull;
bool disable_streamout;
bool has_gen_prim_query;
bool has_xfb_prim_query;
bool use_gfx12_xfb_intrinsic;
bool has_gs_invocations_query;
bool has_gs_primitives_query;
bool kill_pointsize;
bool kill_layer;
bool force_vrs;
bool compact_primitives;
/* VS */
unsigned num_vertices_per_primitive;
bool early_prim_export;
bool passthrough;
bool use_edgeflags;
bool export_primitive_id;
bool export_primitive_id_per_prim;
uint32_t instance_rate_inputs;
uint32_t user_clip_plane_enable_mask;
/* GS */
unsigned gs_out_vtx_bytes;
} ac_nir_lower_ngg_options;
void
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
void
ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
void
ac_nir_lower_ngg_mesh(nir_shader *shader,
enum amd_gfx_level gfx_level,
uint32_t clipdist_enable_mask,
const uint8_t *vs_output_param_offset,
bool has_param_exports,
bool *out_needs_scratch_ring,
unsigned wave_size,
unsigned workgroup_size,
bool multiview,
bool has_query,
bool fast_launch_2);
void
ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
unsigned task_payload_entry_bytes,
unsigned task_num_entries,
bool has_query);
void
ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
unsigned task_payload_entry_bytes,
unsigned task_num_entries);
bool
ac_nir_lower_global_access(nir_shader *shader);
bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
bool ac_nir_lower_image_opcodes(nir_shader *nir);
typedef struct ac_nir_gs_output_info {
const uint8_t *streams;
const uint8_t *streams_16bit_lo;
const uint8_t *streams_16bit_hi;
const uint8_t *varying_mask;
const uint8_t *varying_mask_16bit_lo;
const uint8_t *varying_mask_16bit_hi;
const uint8_t *sysval_mask;
/* type for each 16bit slot component */
nir_alu_type (*types_16bit_lo)[4];
nir_alu_type (*types_16bit_hi)[4];
} ac_nir_gs_output_info;
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
const uint8_t *param_offsets,
bool has_param_exports,
bool disable_streamout,
bool kill_pointsize,
bool kill_layer,
bool force_vrs,
ac_nir_gs_output_info *output_info);
void
ac_nir_lower_legacy_vs(nir_shader *nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
const uint8_t *param_offsets,
bool has_param_exports,
bool export_primitive_id,
bool disable_streamout,
bool kill_pointsize,
bool kill_layer,
bool force_vrs);
void
ac_nir_lower_legacy_gs(nir_shader *nir,
bool has_gen_prim_query,
bool has_pipeline_stats_query,
ac_nir_gs_output_info *output_info);
typedef struct {
/* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't
* generate AMD-specific intrinsics.
*/
/* System values. */
bool force_persp_sample_interp;
bool force_linear_sample_interp;
bool force_persp_center_interp;
bool force_linear_center_interp;
unsigned ps_iter_samples;
/* Outputs. */
bool clamp_color; /* GL only */
bool alpha_test_alpha_to_one; /* GL only, this only affects alpha test */
enum compare_func alpha_func; /* GL only */
bool keep_alpha_for_mrtz; /* this prevents killing alpha based on spi_shader_col_format_hint */
unsigned spi_shader_col_format_hint; /* this only shrinks and eliminates output stores */
bool kill_z;
bool kill_stencil;
bool kill_samplemask;
} ac_nir_lower_ps_early_options;
void
ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options);
typedef struct {
/* This is a post-link pass. It shouldn't eliminate any code and it shouldn't affect shader_info
* (those should be done in the early pass).
*/
enum amd_gfx_level gfx_level;
enum radeon_family family;
bool use_aco;
/* System values. */
bool bc_optimize_for_persp;
bool bc_optimize_for_linear;
/* Exports. */
bool uses_discard;
bool alpha_to_coverage_via_mrtz;
bool dual_src_blend_swizzle;
unsigned spi_shader_col_format;
unsigned color_is_int8;
unsigned color_is_int10;
bool alpha_to_one;
/* Vulkan only */
unsigned enable_mrt_output_nan_fixup;
bool no_color_export;
bool no_depth_export;
} ac_nir_lower_ps_late_options;
void
ac_nir_lower_ps_late(nir_shader *nir, const ac_nir_lower_ps_late_options *options);
typedef struct {
enum amd_gfx_level gfx_level;
/* If true, round the layer component of the coordinates source to the nearest
* integer for all array ops. This is always done for cube array ops.
*/
bool lower_array_layer_round_even;
/* Fix derivatives of constants and FS inputs in control flow.
*
* Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads,
* pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and
* texture samples with nir_tex_src_min_lod.
*
* The layer must also be a constant or FS input.
*/
bool fix_derivs_in_divergent_cf;
unsigned max_wqm_vgprs;
} ac_nir_lower_tex_options;
bool
ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options);
void
ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4);
bool
ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level);
unsigned
ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer);
bool
ac_nir_opt_shared_append(nir_shader *shader);
bool
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm, bool after_lowering);
bool
ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
bool
ac_nir_optimize_uniform_atomics(nir_shader *nir);
unsigned
ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data);
bool
ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
unsigned num_components, int64_t hole_size,
nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
bool
ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data);
enum gl_access_qualifier
ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr);
#ifdef __cplusplus
}
#endif
#endif /* AC_NIR_H */