radeonsi: move most "info" fields from si_shader_selector into si_shader_info
It's where they should be, and future commits might require this. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
This commit is contained in:
@@ -934,7 +934,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
LLVMValueRef position[4] = {};
|
||||
unsigned pos_index = 0;
|
||||
unsigned clip_plane_enable = SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(shader->key.ge.opt.ngg_culling);
|
||||
unsigned clipdist_enable = (sel->clipdist_mask & clip_plane_enable) | sel->culldist_mask;
|
||||
unsigned clipdist_enable = (sel->info.clipdist_mask & clip_plane_enable) | sel->info.culldist_mask;
|
||||
bool has_clipdist_mask = false;
|
||||
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
@@ -999,7 +999,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
}
|
||||
}
|
||||
|
||||
if (clip_plane_enable && !sel->clipdist_mask) {
|
||||
if (clip_plane_enable && !sel->info.clipdist_mask) {
|
||||
/* When clip planes are enabled and there are no clip distance outputs,
|
||||
* we should use user clip planes and cull against the position.
|
||||
*/
|
||||
@@ -1337,7 +1337,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + GFX9_GS_NUM_USER_SGPR);
|
||||
|
||||
for (unsigned i = 0; i < shader->selector->num_vbos_in_user_sgprs; i++) {
|
||||
for (unsigned i = 0; i < shader->selector->info.num_vbos_in_user_sgprs; i++) {
|
||||
ret = si_insert_input_v4i32(ctx, ret, ctx->vb_descriptors[i],
|
||||
8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + i * 4);
|
||||
}
|
||||
@@ -1349,8 +1349,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
|
||||
unsigned vgpr;
|
||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
if (shader->selector->num_vbos_in_user_sgprs) {
|
||||
vgpr = 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
|
||||
if (shader->selector->info.num_vbos_in_user_sgprs) {
|
||||
vgpr = 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->info.num_vbos_in_user_sgprs * 4;
|
||||
} else {
|
||||
vgpr = 8 + GFX9_GS_NUM_USER_SGPR + 1;
|
||||
}
|
||||
@@ -1770,7 +1770,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL
|
||||
LLVMBuildStore(builder, out_val, ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
|
||||
}
|
||||
}
|
||||
assert(out_idx * 4 == sel->gsvs_vertex_size);
|
||||
assert(out_idx * 4 == info->gsvs_vertex_size);
|
||||
|
||||
/* Determine and store whether this vertex completed a primitive. */
|
||||
const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
|
||||
@@ -2227,8 +2227,8 @@ retry_select_mode:
|
||||
max_out_verts_per_gsprim = gs_sel->info.base.gs.vertices_out;
|
||||
}
|
||||
|
||||
esvert_lds_size = es_sel->esgs_itemsize / 4;
|
||||
gsprim_lds_size = (gs_sel->gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
|
||||
esvert_lds_size = es_sel->info.esgs_itemsize / 4;
|
||||
gsprim_lds_size = (gs_sel->info.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
|
||||
|
||||
if (gsprim_lds_size > target_lds_size && !force_multi_cycling) {
|
||||
if (gs_sel->tess_turns_off_ngg || es_sel->info.stage != MESA_SHADER_TESS_EVAL) {
|
||||
|
@@ -125,7 +125,7 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
|
||||
si_init_compiler(sscreen, compiler);
|
||||
|
||||
assert(program->ir_type == PIPE_SHADER_IR_NIR);
|
||||
si_nir_scan_shader(sel->nir, &sel->info);
|
||||
si_nir_scan_shader(sscreen, sel->nir, &sel->info);
|
||||
|
||||
si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
|
||||
&sel->active_samplers_and_images);
|
||||
|
@@ -122,7 +122,6 @@ extern "C" {
|
||||
|
||||
#define SI_MAX_BORDER_COLORS 4096
|
||||
#define SI_MAX_VIEWPORTS 16
|
||||
#define SI_USER_CLIP_PLANE_MASK 0x3F
|
||||
#define SI_MAP_BUFFER_ALIGNMENT 64
|
||||
/* We only support the minimum allowed value (512), so that we can pack a 3D block size
|
||||
* in 1 SGPR. */
|
||||
@@ -1882,8 +1881,8 @@ static inline unsigned si_get_total_colormask(struct si_context *sctx)
|
||||
sctx->framebuffer.colorbuf_enabled_4bit & sctx->queued.named.blend->cb_target_mask;
|
||||
|
||||
if (!ps->info.color0_writes_all_cbufs)
|
||||
colormask &= ps->colors_written_4bit;
|
||||
else if (!ps->colors_written_4bit)
|
||||
colormask &= ps->info.colors_written_4bit;
|
||||
else if (!ps->info.colors_written_4bit)
|
||||
colormask = 0; /* color0 writes all cbufs, but it's not written */
|
||||
|
||||
return colormask;
|
||||
|
@@ -281,7 +281,7 @@ static void declare_vb_descriptor_input_sgprs(struct si_shader_context *ctx)
|
||||
{
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->args.vertex_buffers);
|
||||
|
||||
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
|
||||
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->info.num_vbos_in_user_sgprs;
|
||||
if (num_vbos_in_user_sgprs) {
|
||||
unsigned user_sgprs = ctx->args.num_sgprs_used;
|
||||
|
||||
@@ -496,14 +496,14 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
|
||||
/* VS outputs passed via VGPRs to TCS. */
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
unsigned num_outputs = util_last_bit64(shader->selector->outputs_written);
|
||||
unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written);
|
||||
for (i = 0; i < num_outputs * 4; i++)
|
||||
ac_add_return(&ctx->args, AC_ARG_VGPR);
|
||||
}
|
||||
} else {
|
||||
/* TCS inputs are passed via VGPRs from VS. */
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->outputs_written);
|
||||
unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written);
|
||||
for (i = 0; i < num_inputs * 4; i++)
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
|
||||
}
|
||||
@@ -592,10 +592,10 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
*/
|
||||
num_user_sgprs = GFX9_GS_NUM_USER_SGPR + 1;
|
||||
|
||||
if (shader->selector->num_vbos_in_user_sgprs) {
|
||||
if (shader->selector->info.num_vbos_in_user_sgprs) {
|
||||
assert(num_user_sgprs <= SI_SGPR_VS_VB_DESCRIPTOR_FIRST);
|
||||
num_user_sgprs =
|
||||
SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
|
||||
SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->info.num_vbos_in_user_sgprs * 4;
|
||||
}
|
||||
} else {
|
||||
num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
|
||||
@@ -1319,7 +1319,7 @@ bool si_vs_needs_prolog(const struct si_shader_selector *sel,
|
||||
|
||||
/* VGPR initialization fixup for Vega10 and Raven is always done in the
|
||||
* VS prolog. */
|
||||
return sel->vs_needs_prolog || prolog_key->ls_vgpr_fix ||
|
||||
return sel->info.vs_needs_prolog || prolog_key->ls_vgpr_fix ||
|
||||
/* The 2nd VS prolog loads input VGPRs from LDS */
|
||||
(key->ge.opt.ngg_culling && !ngg_cull_shader && !is_gs);
|
||||
}
|
||||
@@ -1575,7 +1575,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||
void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
|
||||
{
|
||||
struct si_shader_info info;
|
||||
si_nir_scan_shader(nir, &info);
|
||||
si_nir_scan_shader(shader->selector->screen, nir, &info);
|
||||
|
||||
shader->info.uses_vmem_load_other |= info.uses_vmem_load_other;
|
||||
shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
|
||||
@@ -1915,7 +1915,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
|
||||
if (info->colors_read) {
|
||||
ubyte *color = shader->selector->color_attr_index;
|
||||
ubyte *color = shader->selector->info.color_attr_index;
|
||||
|
||||
if (shader->key.ps.part.prolog.color_two_side) {
|
||||
/* BCOLORs are stored after the last input. */
|
||||
|
@@ -156,6 +156,7 @@ struct si_context;
|
||||
|
||||
#define SI_MAX_ATTRIBS 16
|
||||
#define SI_MAX_VS_OUTPUTS 40
|
||||
#define SI_USER_CLIP_PLANE_MASK 0x3F
|
||||
|
||||
#define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29))
|
||||
|
||||
@@ -362,18 +363,43 @@ struct si_shader_info {
|
||||
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
|
||||
ubyte output_type[PIPE_MAX_SHADER_OUTPUTS]; /* enum nir_alu_type */
|
||||
|
||||
ubyte color_interpolate[2];
|
||||
ubyte color_interpolate_loc[2];
|
||||
|
||||
int constbuf0_num_slots;
|
||||
ubyte num_vs_inputs;
|
||||
ubyte num_vbos_in_user_sgprs;
|
||||
ubyte num_stream_output_components[4];
|
||||
uint16_t enabled_streamout_buffer_mask;
|
||||
|
||||
uint num_memory_stores;
|
||||
uint64_t inputs_read; /* "get_unique_index" bits */
|
||||
uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */
|
||||
|
||||
uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
|
||||
uint64_t outputs_written; /* "get_unique_index" bits */
|
||||
uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
|
||||
|
||||
ubyte clipdist_mask;
|
||||
ubyte culldist_mask;
|
||||
|
||||
uint16_t lshs_vertex_stride;
|
||||
uint16_t esgs_itemsize; /* vertex stride */
|
||||
uint16_t gsvs_vertex_size;
|
||||
ubyte gs_input_verts_per_prim;
|
||||
unsigned max_gsvs_emit_size;
|
||||
|
||||
/* PS parameters */
|
||||
unsigned db_shader_control;
|
||||
/* Set 0xf or 0x0 (4 bits) per each written output.
|
||||
* ANDed with spi_shader_col_format.
|
||||
*/
|
||||
unsigned colors_written_4bit;
|
||||
|
||||
int constbuf0_num_slots;
|
||||
uint num_memory_stores;
|
||||
ubyte color_attr_index[2];
|
||||
ubyte color_interpolate[2];
|
||||
ubyte color_interpolate_loc[2];
|
||||
ubyte colors_read; /**< which color components are read by the FS */
|
||||
ubyte colors_written;
|
||||
uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */
|
||||
bool vs_needs_prolog;
|
||||
bool color0_writes_all_cbufs; /**< gl_FragColor */
|
||||
bool reads_samplemask; /**< does fragment shader read sample mask? */
|
||||
bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */
|
||||
@@ -465,44 +491,17 @@ struct si_shader_selector {
|
||||
enum pipe_shader_type pipe_shader_type;
|
||||
ubyte const_and_shader_buf_descriptors_index;
|
||||
ubyte sampler_and_images_descriptors_index;
|
||||
bool vs_needs_prolog;
|
||||
ubyte cs_shaderbufs_sgpr_index;
|
||||
ubyte cs_num_shaderbufs_in_user_sgprs;
|
||||
ubyte cs_images_sgpr_index;
|
||||
ubyte cs_images_num_sgprs;
|
||||
ubyte cs_num_images_in_user_sgprs;
|
||||
ubyte num_vs_inputs;
|
||||
ubyte num_vbos_in_user_sgprs;
|
||||
unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
|
||||
ubyte clipdist_mask;
|
||||
ubyte culldist_mask;
|
||||
enum pipe_prim_type rast_prim;
|
||||
|
||||
/* ES parameters. */
|
||||
uint16_t esgs_itemsize; /* vertex stride */
|
||||
uint16_t lshs_vertex_stride;
|
||||
|
||||
/* GS parameters. */
|
||||
uint16_t gsvs_vertex_size;
|
||||
ubyte gs_input_verts_per_prim;
|
||||
unsigned max_gsvs_emit_size;
|
||||
bool tess_turns_off_ngg;
|
||||
|
||||
/* PS parameters. */
|
||||
ubyte color_attr_index[2];
|
||||
unsigned db_shader_control;
|
||||
/* Set 0xf or 0x0 (4 bits) per each written output.
|
||||
* ANDed with spi_shader_col_format.
|
||||
*/
|
||||
unsigned colors_written_4bit;
|
||||
|
||||
uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
|
||||
uint64_t outputs_written; /* "get_unique_index" bits */
|
||||
uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
|
||||
|
||||
uint64_t inputs_read; /* "get_unique_index" bits */
|
||||
uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */
|
||||
|
||||
/* bitmasks of used descriptor slots */
|
||||
uint64_t active_const_and_shader_buffers;
|
||||
uint64_t active_samplers_and_images;
|
||||
@@ -952,7 +951,8 @@ const char *si_get_shader_name(const struct si_shader *shader);
|
||||
void si_shader_binary_clean(struct si_shader_binary *binary);
|
||||
|
||||
/* si_shader_info.c */
|
||||
void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info);
|
||||
void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
|
||||
struct si_shader_info *info);
|
||||
|
||||
/* si_shader_llvm_gs.c */
|
||||
struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
|
@@ -22,8 +22,10 @@
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "si_shader.h"
|
||||
#include "si_pipe.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "sid.h"
|
||||
|
||||
|
||||
struct si_shader_profile {
|
||||
@@ -580,7 +582,8 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
|
||||
}
|
||||
}
|
||||
|
||||
void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info)
|
||||
void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
|
||||
struct si_shader_info *info)
|
||||
{
|
||||
memset(info, 0, sizeof(*info));
|
||||
info->base = nir->info;
|
||||
@@ -729,4 +732,155 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
|
||||
info->output_readmask[i] &= info->output_usagemask[i];
|
||||
|
||||
info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir);
|
||||
|
||||
if (info->stage == MESA_SHADER_VERTEX ||
|
||||
info->stage == MESA_SHADER_TESS_CTRL ||
|
||||
info->stage == MESA_SHADER_TESS_EVAL ||
|
||||
info->stage == MESA_SHADER_GEOMETRY) {
|
||||
if (info->stage == MESA_SHADER_TESS_CTRL) {
|
||||
/* Always reserve space for these. */
|
||||
info->patch_outputs_written |=
|
||||
(1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
|
||||
(1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
|
||||
}
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
unsigned semantic = info->output_semantic[i];
|
||||
|
||||
if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
|
||||
(semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
|
||||
info->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
|
||||
} else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
|
||||
semantic != VARYING_SLOT_EDGE) {
|
||||
info->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
|
||||
|
||||
/* Ignore outputs that are not passed from VS to PS. */
|
||||
if (semantic != VARYING_SLOT_POS &&
|
||||
semantic != VARYING_SLOT_PSIZ &&
|
||||
semantic != VARYING_SLOT_CLIP_VERTEX) {
|
||||
info->outputs_written_before_ps |= 1ull
|
||||
<< si_shader_io_get_unique_index(semantic, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
info->num_vs_inputs =
|
||||
info->stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0;
|
||||
unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
|
||||
info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs);
|
||||
|
||||
/* The prolog is a no-op if there are no inputs. */
|
||||
info->vs_needs_prolog = info->num_inputs && !info->base.vs.blit_sgprs_amd;
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX ||
|
||||
nir->info.stage == MESA_SHADER_TESS_CTRL ||
|
||||
nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
info->esgs_itemsize = util_last_bit64(info->outputs_written) * 16;
|
||||
info->lshs_vertex_stride = info->esgs_itemsize;
|
||||
|
||||
/* Add 1 dword to reduce LDS bank conflicts, so that each vertex
|
||||
* will start on a different bank. (except for the maximum 32*16).
|
||||
*/
|
||||
if (info->lshs_vertex_stride < 32 * 16)
|
||||
info->lshs_vertex_stride += 4;
|
||||
|
||||
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
|
||||
* conflicts, i.e. each vertex will start at a different bank.
|
||||
*/
|
||||
if (sscreen->info.chip_class >= GFX9)
|
||||
info->esgs_itemsize += 4;
|
||||
|
||||
assert(((info->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
|
||||
|
||||
info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read &
|
||||
~info->base.inputs_read_indirectly &
|
||||
info->base.inputs_read;
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
info->gsvs_vertex_size = info->num_outputs * 16;
|
||||
info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out;
|
||||
info->gs_input_verts_per_prim =
|
||||
u_vertices_per_prim((enum pipe_prim_type)info->base.gs.input_primitive);
|
||||
}
|
||||
|
||||
info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
|
||||
u_bit_consecutive(0, info->base.clip_distance_array_size);
|
||||
info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) <<
|
||||
info->base.clip_distance_array_size;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
for (unsigned i = 0; i < info->num_inputs; i++) {
|
||||
unsigned semantic = info->input[i].semantic;
|
||||
|
||||
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
|
||||
semantic != VARYING_SLOT_PNTC) {
|
||||
info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
if (info->colors_written & (1 << i))
|
||||
info->colors_written_4bit |= 0xf << (4 * i);
|
||||
|
||||
for (unsigned i = 0; i < info->num_inputs; i++) {
|
||||
if (info->input[i].semantic == VARYING_SLOT_COL0)
|
||||
info->color_attr_index[0] = i;
|
||||
else if (info->input[i].semantic == VARYING_SLOT_COL1)
|
||||
info->color_attr_index[1] = i;
|
||||
}
|
||||
|
||||
/* DB_SHADER_CONTROL */
|
||||
info->db_shader_control = S_02880C_Z_EXPORT_ENABLE(info->writes_z) |
|
||||
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) |
|
||||
S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) |
|
||||
S_02880C_KILL_ENABLE(info->base.fs.uses_discard);
|
||||
|
||||
switch (info->base.fs.depth_layout) {
|
||||
case FRAG_DEPTH_LAYOUT_GREATER:
|
||||
info->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
|
||||
break;
|
||||
case FRAG_DEPTH_LAYOUT_LESS:
|
||||
info->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
/* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
|
||||
*
|
||||
* | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
|
||||
* --|-----------|------------|------------|--------------------|-------------------|-------------
|
||||
* 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0
|
||||
* 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0
|
||||
* 2 | false | true | n/a | LateZ | 1 | 0
|
||||
* 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0
|
||||
* 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1
|
||||
*
|
||||
* In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
|
||||
* In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
|
||||
*
|
||||
* Don't use ReZ without profiling !!!
|
||||
*
|
||||
* ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
|
||||
* shaders.
|
||||
*/
|
||||
if (info->base.fs.early_fragment_tests) {
|
||||
/* Cases 3, 4. */
|
||||
info->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
|
||||
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
|
||||
S_02880C_EXEC_ON_NOOP(info->base.writes_memory);
|
||||
} else if (info->base.writes_memory) {
|
||||
/* Case 2. */
|
||||
info->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1);
|
||||
} else {
|
||||
/* Case 1. */
|
||||
info->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
|
||||
}
|
||||
|
||||
if (info->base.fs.post_depth_coverage)
|
||||
info->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1);
|
||||
}
|
||||
}
|
||||
|
@@ -1027,7 +1027,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
||||
/* We need the barrier only if TCS inputs are read from LDS. */
|
||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
shader->selector->info.base.inputs_read &
|
||||
~shader->selector->tcs_vgpr_only_inputs)
|
||||
~shader->selector->info.tcs_vgpr_only_inputs)
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
/* gfx10_ngg_gs_emit_prologue inserts the barrier for NGG. */
|
||||
|
@@ -140,7 +140,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi)
|
||||
int i;
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {
|
||||
unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
|
||||
unsigned itemsize_dw = es->selector->info.esgs_itemsize / 4;
|
||||
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
|
||||
LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4);
|
||||
vertex_idx =
|
||||
|
@@ -74,7 +74,7 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
|
||||
if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
|
||||
return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;
|
||||
|
||||
return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
|
||||
return util_last_bit64(ctx->shader->selector->info.outputs_written) * 4;
|
||||
}
|
||||
|
||||
static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
|
||||
@@ -92,7 +92,7 @@ static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
|
||||
const struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
|
||||
unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
|
||||
unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
|
||||
unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->info.patch_outputs_written);
|
||||
unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4;
|
||||
return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
|
||||
}
|
||||
@@ -155,12 +155,12 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
|
||||
|
||||
switch (ctx->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
stride = ctx->shader->selector->lshs_vertex_stride / 4;
|
||||
stride = ctx->shader->selector->info.lshs_vertex_stride / 4;
|
||||
return LLVMConstInt(ctx->ac.i32, stride, 0);
|
||||
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
|
||||
stride = ctx->shader->key.ge.part.tcs.ls->lshs_vertex_stride / 4;
|
||||
stride = ctx->shader->key.ge.part.tcs.ls->info.lshs_vertex_stride / 4;
|
||||
return LLVMConstInt(ctx->ac.i32, stride, 0);
|
||||
}
|
||||
return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
|
||||
@@ -980,7 +980,7 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi)
|
||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||
|
||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
!(ctx->next_shader_sel->tcs_vgpr_only_inputs & (1ull << semantic)))
|
||||
!(ctx->next_shader_sel->info.tcs_vgpr_only_inputs & (1ull << semantic)))
|
||||
lshs_lds_store(ctx, chan, dw_addr, value);
|
||||
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
|
@@ -111,7 +111,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
||||
unsigned bit_size = info->input[input_index].fp16_lo_hi_valid & 0x1 ? 16 : 32;
|
||||
LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32;
|
||||
LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32;
|
||||
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
|
||||
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->info.num_vbos_in_user_sgprs;
|
||||
union si_vs_fix_fetch fix_fetch;
|
||||
LLVMValueRef vb_desc;
|
||||
LLVMValueRef vertex_index;
|
||||
@@ -391,7 +391,7 @@ void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx,
|
||||
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
|
||||
LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
|
||||
LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
|
||||
unsigned clipdist_mask = ctx->shader->selector->clipdist_mask &
|
||||
unsigned clipdist_mask = ctx->shader->selector->info.clipdist_mask &
|
||||
~ctx->shader->key.ge.opt.kill_clip_distances;
|
||||
|
||||
for (reg_index = 0; reg_index < 2; reg_index++) {
|
||||
@@ -569,9 +569,9 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
|
||||
LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL,
|
||||
viewport_index_value = NULL;
|
||||
unsigned pos_idx, index;
|
||||
unsigned clipdist_mask = (shader->selector->clipdist_mask &
|
||||
unsigned clipdist_mask = (shader->selector->info.clipdist_mask &
|
||||
~shader->key.ge.opt.kill_clip_distances) |
|
||||
shader->selector->culldist_mask;
|
||||
shader->selector->info.culldist_mask;
|
||||
int i;
|
||||
|
||||
si_vertex_color_clamping(ctx, outputs, noutput);
|
||||
|
@@ -834,9 +834,9 @@ static void si_emit_clip_regs(struct si_context *sctx)
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
bool window_space = info->stage == MESA_SHADER_VERTEX ?
|
||||
info->base.vs.window_space_position : 0;
|
||||
unsigned clipdist_mask = vs_sel->clipdist_mask;
|
||||
unsigned clipdist_mask = vs_sel->info.clipdist_mask;
|
||||
unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SI_USER_CLIP_PLANE_MASK;
|
||||
unsigned culldist_mask = vs_sel->culldist_mask;
|
||||
unsigned culldist_mask = vs_sel->info.culldist_mask;
|
||||
|
||||
/* Clip distances on points have no effect, so need to be implemented
|
||||
* as cull distances. This applies for the clipvertex case as well.
|
||||
|
@@ -559,13 +559,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
||||
|
||||
/* This calculates how shader inputs and outputs among VS, TCS, and TES
|
||||
* are laid out in LDS. */
|
||||
unsigned num_tcs_inputs = util_last_bit64(ls->outputs_written);
|
||||
unsigned num_tcs_inputs = util_last_bit64(ls->info.outputs_written);
|
||||
unsigned num_tcs_output_cp, num_tcs_outputs, num_tcs_patch_outputs;
|
||||
|
||||
if (sctx->shader.tcs.cso) {
|
||||
num_tcs_outputs = util_last_bit64(tcs->outputs_written);
|
||||
num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
|
||||
num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
|
||||
num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
|
||||
num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
|
||||
} else {
|
||||
/* No TCS. Route varyings from LS to TES. */
|
||||
num_tcs_outputs = num_tcs_inputs;
|
||||
@@ -573,13 +573,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
||||
num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
|
||||
}
|
||||
|
||||
unsigned input_vertex_size = ls->lshs_vertex_stride;
|
||||
unsigned input_vertex_size = ls->info.lshs_vertex_stride;
|
||||
unsigned output_vertex_size = num_tcs_outputs * 16;
|
||||
unsigned input_patch_size;
|
||||
|
||||
/* Allocate LDS for TCS inputs only if it's used. */
|
||||
if (!ls_current->key.ge.opt.same_patch_vertices ||
|
||||
tcs->info.base.inputs_read & ~tcs->tcs_vgpr_only_inputs)
|
||||
tcs->info.base.inputs_read & ~tcs->info.tcs_vgpr_only_inputs)
|
||||
input_patch_size = num_tcs_input_cp * input_vertex_size;
|
||||
else
|
||||
input_patch_size = 0;
|
||||
@@ -2112,8 +2112,8 @@ static void si_draw(struct pipe_context *ctx,
|
||||
struct si_shader_selector *vs = sctx->shader.vs.cso;
|
||||
struct si_vertex_state *vstate = (struct si_vertex_state *)state;
|
||||
if (unlikely(!vs ||
|
||||
(!IS_DRAW_VERTEX_STATE && sctx->num_vertex_elements < vs->num_vs_inputs) ||
|
||||
(IS_DRAW_VERTEX_STATE && vstate->velems.count < vs->num_vs_inputs) ||
|
||||
(!IS_DRAW_VERTEX_STATE && sctx->num_vertex_elements < vs->info.num_vs_inputs) ||
|
||||
(IS_DRAW_VERTEX_STATE && vstate->velems.count < vs->info.num_vs_inputs) ||
|
||||
!sctx->shader.ps.cso || (HAS_TESS != (prim == PIPE_PRIM_PATCHES)))) {
|
||||
assert(0);
|
||||
return;
|
||||
|
@@ -614,7 +614,7 @@ static unsigned si_get_num_vs_user_sgprs(struct si_shader *shader,
|
||||
{
|
||||
struct si_shader_selector *vs =
|
||||
shader->previous_stage_sel ? shader->previous_stage_sel : shader->selector;
|
||||
unsigned num_vbos_in_user_sgprs = vs->num_vbos_in_user_sgprs;
|
||||
unsigned num_vbos_in_user_sgprs = vs->info.num_vbos_in_user_sgprs;
|
||||
|
||||
/* 1 SGPR is reserved for the vertex buffer pointer. */
|
||||
assert(num_always_on_user_sgprs <= SI_SGPR_VS_VB_DESCRIPTOR_FIRST - 1);
|
||||
@@ -744,7 +744,7 @@ static void si_emit_shader_es(struct si_context *sctx)
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
|
||||
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
|
||||
shader->selector->esgs_itemsize / 4);
|
||||
shader->selector->info.esgs_itemsize / 4);
|
||||
|
||||
if (shader->selector->info.stage == MESA_SHADER_TESS_EVAL)
|
||||
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
|
||||
@@ -815,7 +815,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
|
||||
/* We can't allow using the whole LDS, because GS waves compete with
|
||||
* other shader stages for LDS space. */
|
||||
const unsigned max_lds_size = 8 * 1024;
|
||||
const unsigned esgs_itemsize = es->esgs_itemsize / 4;
|
||||
const unsigned esgs_itemsize = es->info.esgs_itemsize / 4;
|
||||
unsigned esgs_lds_size;
|
||||
|
||||
/* All these are per subgroup: */
|
||||
@@ -842,7 +842,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
|
||||
/* If the primitive has adjacency, halve the number of vertices
|
||||
* that will be reused in multiple primitives.
|
||||
*/
|
||||
min_es_verts = gs->gs_input_verts_per_prim / (uses_adjacency ? 2 : 1);
|
||||
min_es_verts = gs->info.gs_input_verts_per_prim / (uses_adjacency ? 2 : 1);
|
||||
|
||||
gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
|
||||
worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
|
||||
@@ -877,7 +877,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
|
||||
/* Vertices for adjacency primitives are not always reused, so restore
|
||||
* it for ES_VERTS_PER_SUBGRP.
|
||||
*/
|
||||
min_es_verts = gs->gs_input_verts_per_prim;
|
||||
min_es_verts = gs->info.gs_input_verts_per_prim;
|
||||
|
||||
/* For normal primitives, the VGT only checks if they are past the ES
|
||||
* verts per subgroup after allocating a full GS primitive and if they
|
||||
@@ -1105,7 +1105,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(shader->gs_info.gs_inst_prims_in_subgroup);
|
||||
shader->ctx_reg.gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(shader->gs_info.max_prims_per_subgroup);
|
||||
shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.ge.part.gs.es->esgs_itemsize / 4;
|
||||
shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.ge.part.gs.es->info.esgs_itemsize / 4;
|
||||
|
||||
if (es_stage == MESA_SHADER_TESS_EVAL)
|
||||
si_set_tesseval_regs(sscreen, shader->key.ge.part.gs.es, shader);
|
||||
@@ -1286,8 +1286,8 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
|
||||
const struct si_shader *shader, bool ngg)
|
||||
{
|
||||
/* Clip distances can be killed, but cull distances can't. */
|
||||
unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) |
|
||||
sel->culldist_mask;
|
||||
unsigned clipcull_mask = (sel->info.clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) |
|
||||
sel->info.culldist_mask;
|
||||
bool writes_psize = sel->info.writes_psize && !shader->key.ge.opt.kill_pointsize;
|
||||
bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
|
||||
sel->screen->options.vrs2x2 ||
|
||||
@@ -1427,7 +1427,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||
gs_sel->info.writes_primid);
|
||||
|
||||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->esgs_itemsize / 4;
|
||||
shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->info.esgs_itemsize / 4;
|
||||
shader->ctx_reg.ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out;
|
||||
} else {
|
||||
shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = 1;
|
||||
@@ -2071,16 +2071,16 @@ void si_update_ps_inputs_read_or_disabled(struct si_context *sctx)
|
||||
(!ps_colormask && !ps_modifies_zs && !ps->info.base.writes_memory);
|
||||
}
|
||||
|
||||
sctx->ps_inputs_read_or_disabled = ps_disabled ? 0 : ps->inputs_read;
|
||||
sctx->ps_inputs_read_or_disabled = ps_disabled ? 0 : ps->info.inputs_read;
|
||||
}
|
||||
|
||||
static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
|
||||
union si_shader_key *key)
|
||||
{
|
||||
key->ge.opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
|
||||
key->ge.opt.kill_clip_distances = vs->info.clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
|
||||
|
||||
/* Find out which VS outputs aren't used by the PS. */
|
||||
uint64_t outputs_written = vs->outputs_written_before_ps;
|
||||
uint64_t outputs_written = vs->info.outputs_written_before_ps;
|
||||
uint64_t linked = outputs_written & sctx->ps_inputs_read_or_disabled;
|
||||
|
||||
key->ge.opt.kill_outputs = ~linked & outputs_written;
|
||||
@@ -2185,7 +2185,7 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
|
||||
|
||||
/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
|
||||
if (!key->ps.part.epilog.last_cbuf) {
|
||||
key->ps.part.epilog.spi_shader_col_format &= sel->colors_written_4bit;
|
||||
key->ps.part.epilog.spi_shader_col_format &= sel->info.colors_written_4bit;
|
||||
key->ps.part.epilog.color_is_int8 &= sel->info.colors_written;
|
||||
key->ps.part.epilog.color_is_int10 &= sel->info.colors_written;
|
||||
}
|
||||
@@ -2196,7 +2196,7 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
|
||||
*
|
||||
* Dual source blending never has color buffer 1 enabled, so ignore it.
|
||||
*/
|
||||
if (sel->colors_written_4bit &
|
||||
if (sel->info.colors_written_4bit &
|
||||
(blend->dual_src_blend ? 0xffffff0f : 0xffffffff) &
|
||||
~(sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit))
|
||||
key->ps.opt.prefer_mono = 1;
|
||||
@@ -2944,7 +2944,7 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
|
||||
semantic != VARYING_SLOT_CLIP_VERTEX &&
|
||||
semantic != VARYING_SLOT_EDGE) {
|
||||
id = si_shader_io_get_unique_index(semantic, true);
|
||||
sel->outputs_written_before_ps &= ~(1ull << id);
|
||||
sel->info.outputs_written_before_ps &= ~(1ull << id);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3024,7 +3024,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
struct si_screen *sscreen = (struct si_screen *)ctx->screen;
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
|
||||
int i;
|
||||
|
||||
if (!sel)
|
||||
return NULL;
|
||||
@@ -3040,7 +3039,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
sel->nir = (nir_shader*)state->ir.nir;
|
||||
}
|
||||
|
||||
si_nir_scan_shader(sel->nir, &sel->info);
|
||||
si_nir_scan_shader(sscreen, sel->nir, &sel->info);
|
||||
|
||||
const enum pipe_shader_type type = pipe_shader_type_from_mesa(sel->info.stage);
|
||||
sel->pipe_shader_type = type;
|
||||
@@ -3053,49 +3052,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
|
||||
&sel->active_samplers_and_images);
|
||||
|
||||
sel->num_vs_inputs =
|
||||
sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
|
||||
? sel->info.num_inputs
|
||||
: 0;
|
||||
unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.chip_class);
|
||||
sel->num_vbos_in_user_sgprs = MIN2(sel->num_vs_inputs, num_vbos_in_sgprs);
|
||||
|
||||
/* The prolog is a no-op if there are no inputs. */
|
||||
sel->vs_needs_prolog = sel->info.stage == MESA_SHADER_VERTEX && sel->info.num_inputs &&
|
||||
!sel->info.base.vs.blit_sgprs_amd;
|
||||
|
||||
if (sel->info.stage == MESA_SHADER_VERTEX ||
|
||||
sel->info.stage == MESA_SHADER_TESS_CTRL ||
|
||||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
|
||||
sel->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
if (sel->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
/* Always reserve space for these. */
|
||||
sel->patch_outputs_written |=
|
||||
(1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
|
||||
(1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
|
||||
}
|
||||
for (i = 0; i < sel->info.num_outputs; i++) {
|
||||
unsigned semantic = sel->info.output_semantic[i];
|
||||
|
||||
if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
|
||||
(semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
|
||||
sel->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
|
||||
} else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
|
||||
semantic != VARYING_SLOT_EDGE) {
|
||||
sel->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
|
||||
|
||||
/* Ignore outputs that are not passed from VS to PS. */
|
||||
if (semantic != VARYING_SLOT_POS &&
|
||||
semantic != VARYING_SLOT_PSIZ &&
|
||||
semantic != VARYING_SLOT_CLIP_VERTEX) {
|
||||
sel->outputs_written_before_ps |= 1ull
|
||||
<< si_shader_io_get_unique_index(semantic, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (sel->info.stage) {
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
/* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
|
||||
@@ -3103,11 +3059,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
if (util_rast_prim_is_triangles(sel->rast_prim))
|
||||
sel->rast_prim = PIPE_PRIM_TRIANGLES;
|
||||
|
||||
sel->gsvs_vertex_size = sel->info.num_outputs * 16;
|
||||
sel->max_gsvs_emit_size = sel->gsvs_vertex_size * sel->info.base.gs.vertices_out;
|
||||
sel->gs_input_verts_per_prim =
|
||||
u_vertices_per_prim((enum pipe_prim_type)sel->info.base.gs.input_primitive);
|
||||
|
||||
/* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation so
|
||||
* we can't split workgroups. Disable ngg if any of the following conditions is true:
|
||||
* - num_invocations * gs.vertices_out > 256
|
||||
@@ -3120,30 +3071,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
break;
|
||||
|
||||
case MESA_SHADER_VERTEX:
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
|
||||
sel->lshs_vertex_stride = sel->esgs_itemsize;
|
||||
|
||||
/* Add 1 dword to reduce LDS bank conflicts, so that each vertex
|
||||
* will start on a different bank. (except for the maximum 32*16).
|
||||
*/
|
||||
if (sel->lshs_vertex_stride < 32 * 16)
|
||||
sel->lshs_vertex_stride += 4;
|
||||
|
||||
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
|
||||
* conflicts, i.e. each vertex will start at a different bank.
|
||||
*/
|
||||
if (sctx->chip_class >= GFX9)
|
||||
sel->esgs_itemsize += 4;
|
||||
|
||||
assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
|
||||
|
||||
sel->tcs_vgpr_only_inputs = ~sel->info.base.tess.tcs_cross_invocation_inputs_read &
|
||||
~sel->info.base.inputs_read_indirectly &
|
||||
sel->info.base.inputs_read;
|
||||
|
||||
/* Only for TES: */
|
||||
if (sel->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (sel->info.base.tess.point_mode)
|
||||
sel->rast_prim = PIPE_PRIM_POINTS;
|
||||
@@ -3155,28 +3083,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
sel->rast_prim = PIPE_PRIM_TRIANGLES;
|
||||
}
|
||||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
for (i = 0; i < sel->info.num_inputs; i++) {
|
||||
unsigned semantic = sel->info.input[i].semantic;
|
||||
|
||||
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
|
||||
semantic != VARYING_SLOT_PNTC) {
|
||||
sel->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
if (sel->info.colors_written & (1 << i))
|
||||
sel->colors_written_4bit |= 0xf << (4 * i);
|
||||
|
||||
for (i = 0; i < sel->info.num_inputs; i++) {
|
||||
if (sel->info.input[i].semantic == VARYING_SLOT_COL0)
|
||||
sel->color_attr_index[0] = i;
|
||||
else if (sel->info.input[i].semantic == VARYING_SLOT_COL1)
|
||||
sel->color_attr_index[1] = i;
|
||||
}
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
@@ -3208,63 +3114,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
sel->clipdist_mask = sel->info.writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
|
||||
u_bit_consecutive(0, sel->info.base.clip_distance_array_size);
|
||||
sel->culldist_mask = u_bit_consecutive(0, sel->info.base.cull_distance_array_size) <<
|
||||
sel->info.base.clip_distance_array_size;
|
||||
|
||||
/* DB_SHADER_CONTROL */
|
||||
sel->db_shader_control = S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) |
|
||||
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) |
|
||||
S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) |
|
||||
S_02880C_KILL_ENABLE(sel->info.base.fs.uses_discard);
|
||||
|
||||
if (sel->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
switch (sel->info.base.fs.depth_layout) {
|
||||
case FRAG_DEPTH_LAYOUT_GREATER:
|
||||
sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
|
||||
break;
|
||||
case FRAG_DEPTH_LAYOUT_LESS:
|
||||
sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
/* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
|
||||
*
|
||||
* | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
|
||||
* --|-----------|------------|------------|--------------------|-------------------|-------------
|
||||
* 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0
|
||||
* 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0
|
||||
* 2 | false | true | n/a | LateZ | 1 | 0
|
||||
* 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0
|
||||
* 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1
|
||||
*
|
||||
* In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
|
||||
* In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
|
||||
*
|
||||
* Don't use ReZ without profiling !!!
|
||||
*
|
||||
* ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
|
||||
* shaders.
|
||||
*/
|
||||
if (sel->info.base.fs.early_fragment_tests) {
|
||||
/* Cases 3, 4. */
|
||||
sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
|
||||
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
|
||||
S_02880C_EXEC_ON_NOOP(sel->info.base.writes_memory);
|
||||
} else if (sel->info.base.writes_memory) {
|
||||
/* Case 2. */
|
||||
sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1);
|
||||
} else {
|
||||
/* Case 1. */
|
||||
sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
|
||||
}
|
||||
|
||||
if (sel->info.base.fs.post_depth_coverage)
|
||||
sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1);
|
||||
}
|
||||
|
||||
(void)simple_mtx_init(&sel->mutex, mtx_plain);
|
||||
|
||||
si_schedule_initial_compile(sctx, sel->info.stage, &sel->ready, &sel->compiler_ctx_state,
|
||||
@@ -3315,8 +3164,8 @@ static void si_update_clip_regs(struct si_context *sctx, struct si_shader_select
|
||||
(!old_hw_vs ||
|
||||
(old_hw_vs->info.stage == MESA_SHADER_VERTEX && old_hw_vs->info.base.vs.window_space_position) !=
|
||||
(next_hw_vs->info.stage == MESA_SHADER_VERTEX && next_hw_vs->info.base.vs.window_space_position) ||
|
||||
old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask ||
|
||||
old_hw_vs->culldist_mask != next_hw_vs->culldist_mask || !old_hw_vs_variant ||
|
||||
old_hw_vs->info.clipdist_mask != next_hw_vs->info.clipdist_mask ||
|
||||
old_hw_vs->info.culldist_mask != next_hw_vs->info.culldist_mask || !old_hw_vs_variant ||
|
||||
!next_hw_vs_variant ||
|
||||
old_hw_vs_variant->pa_cl_vs_out_cntl != next_hw_vs_variant->pa_cl_vs_out_cntl))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
|
||||
@@ -3383,7 +3232,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
||||
sctx->shader.vs.current = sel ? sel->first_variant : NULL;
|
||||
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
|
||||
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
|
||||
sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0;
|
||||
sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->info.outputs_written : 0;
|
||||
|
||||
if (si_update_ngg(sctx))
|
||||
si_shader_change_notify(sctx);
|
||||
@@ -3556,7 +3405,7 @@ void si_update_ps_kill_enable(struct si_context *sctx)
|
||||
if (!sctx->shader.ps.cso)
|
||||
return;
|
||||
|
||||
unsigned db_shader_control = sctx->shader.ps.cso->db_shader_control |
|
||||
unsigned db_shader_control = sctx->shader.ps.cso->info.db_shader_control |
|
||||
S_02880C_KILL_ENABLE(sctx->queued.named.dsa->alpha_func != PIPE_FUNC_ALWAYS);
|
||||
|
||||
if (sctx->ps_db_shader_control != db_shader_control) {
|
||||
@@ -3801,12 +3650,12 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
|
||||
|
||||
/* Calculate the minimum size. */
|
||||
unsigned min_esgs_ring_size = align(es->esgs_itemsize * gs_vertex_reuse * wave_size, alignment);
|
||||
unsigned min_esgs_ring_size = align(es->info.esgs_itemsize * gs_vertex_reuse * wave_size, alignment);
|
||||
|
||||
/* These are recommended sizes, not minimum sizes. */
|
||||
unsigned esgs_ring_size =
|
||||
max_gs_waves * 2 * wave_size * es->esgs_itemsize * gs->gs_input_verts_per_prim;
|
||||
unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs->max_gsvs_emit_size;
|
||||
max_gs_waves * 2 * wave_size * es->info.esgs_itemsize * gs->info.gs_input_verts_per_prim;
|
||||
unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs->info.max_gsvs_emit_size;
|
||||
|
||||
min_esgs_ring_size = align(min_esgs_ring_size, alignment);
|
||||
esgs_ring_size = align(esgs_ring_size, alignment);
|
||||
|
Reference in New Issue
Block a user