radv: New shader args for NGG culling settings and viewport.
Add new shader arguments in RADV for: - NGG culling settings - Viewport transform These will be used by NGG culling shaders. Additionally, some tweaks are made to some config registers in order to make culling shaders more efficient. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10525>
This commit is contained in:
@@ -1327,6 +1327,19 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
||||
radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
|
||||
|
||||
if (pipeline->graphics.has_ngg_culling &&
|
||||
pipeline->graphics.last_vgt_api_stage != MESA_SHADER_GEOMETRY &&
|
||||
!cmd_buffer->state.last_nggc_settings) {
|
||||
/* The already emitted RSRC2 contains the LDS required for NGG culling.
|
||||
* Culling is currently disabled, so re-emit RSRC2 to reduce LDS usage.
|
||||
* API GS always needs LDS, so this isn't useful there.
|
||||
*/
|
||||
struct radv_shader_variant *v = pipeline->shaders[pipeline->graphics.last_vgt_api_stage];
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
(v->config.rsrc2 & C_00B22C_LDS_SIZE) |
|
||||
S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling));
|
||||
}
|
||||
|
||||
if (!cmd_buffer->state.emitted_pipeline ||
|
||||
cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
|
||||
cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
|
||||
@@ -3839,6 +3852,8 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
|
||||
cmd_buffer->state.last_sx_ps_downconvert = -1;
|
||||
cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
|
||||
cmd_buffer->state.last_sx_blend_opt_control = -1;
|
||||
cmd_buffer->state.last_nggc_settings = -1;
|
||||
cmd_buffer->state.last_nggc_settings_sgpr_idx = -1;
|
||||
cmd_buffer->usage_flags = pBeginInfo->flags;
|
||||
|
||||
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
|
||||
@@ -4961,6 +4976,10 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
||||
if (secondary->state.last_index_type != -1) {
|
||||
primary->state.last_index_type = secondary->state.last_index_type;
|
||||
}
|
||||
|
||||
primary->state.last_nggc_settings = secondary->state.last_nggc_settings;
|
||||
primary->state.last_nggc_settings_sgpr_idx = secondary->state.last_nggc_settings_sgpr_idx;
|
||||
primary->state.last_nggc_skip = secondary->state.last_nggc_skip;
|
||||
}
|
||||
|
||||
/* After executing commands from secondary buffers we have to dirty
|
||||
@@ -5635,6 +5654,209 @@ radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
|
||||
return false;
|
||||
}
|
||||
|
||||
enum {
|
||||
ngg_cull_none = 0,
|
||||
ngg_cull_front_face = 1,
|
||||
ngg_cull_back_face = 2,
|
||||
ngg_cull_face_is_ccw = 4,
|
||||
ngg_cull_small_primitives = 8,
|
||||
};
|
||||
|
||||
ALWAYS_INLINE static bool
|
||||
radv_skip_ngg_culling(bool has_tess, const unsigned vtx_cnt,
|
||||
bool indirect, unsigned num_viewports)
|
||||
{
|
||||
/* If we have to draw only a few vertices, we get better latency if
|
||||
* we disable NGG culling.
|
||||
*
|
||||
* When tessellation is used, what matters is the number of tessellated
|
||||
* vertices, so let's always assume it's not a small draw.
|
||||
*
|
||||
* TODO: Figure out how to do culling with multiple viewports efficiently.
|
||||
*/
|
||||
return !has_tess && !indirect && vtx_cnt < 512 && num_viewports == 1;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static uint32_t
|
||||
radv_get_ngg_culling_settings(struct radv_cmd_buffer *cmd_buffer, bool vp_y_inverted)
|
||||
{
|
||||
const struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
|
||||
/* Cull every triangle when rasterizer discard is enabled. */
|
||||
if (d->rasterizer_discard_enable ||
|
||||
G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.pa_cl_clip_cntl))
|
||||
return ngg_cull_front_face | ngg_cull_back_face;
|
||||
|
||||
uint32_t pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl;
|
||||
uint32_t nggc_settings = ngg_cull_none;
|
||||
|
||||
/* The culling code needs to know whether face is CW or CCW. */
|
||||
bool ccw = (pipeline->graphics.needed_dynamic_state & RADV_DYNAMIC_FRONT_FACE)
|
||||
? d->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE
|
||||
: G_028814_FACE(pa_su_sc_mode_cntl) == 0;
|
||||
|
||||
/* Take inverted viewport into account. */
|
||||
ccw ^= vp_y_inverted;
|
||||
|
||||
if (ccw)
|
||||
nggc_settings |= ngg_cull_face_is_ccw;
|
||||
|
||||
/* Face culling settings. */
|
||||
if ((pipeline->graphics.needed_dynamic_state & RADV_DYNAMIC_CULL_MODE)
|
||||
? (d->cull_mode & VK_CULL_MODE_FRONT_BIT)
|
||||
: G_028814_CULL_FRONT(pa_su_sc_mode_cntl))
|
||||
nggc_settings |= ngg_cull_front_face;
|
||||
if ((pipeline->graphics.needed_dynamic_state & RADV_DYNAMIC_CULL_MODE)
|
||||
? (d->cull_mode & VK_CULL_MODE_BACK_BIT)
|
||||
: G_028814_CULL_BACK(pa_su_sc_mode_cntl))
|
||||
nggc_settings |= ngg_cull_back_face;
|
||||
|
||||
/* Small primitive culling is only valid when conservative overestimation is not used. */
|
||||
if (!pipeline->graphics.uses_conservative_overestimate) {
|
||||
nggc_settings |= ngg_cull_small_primitives;
|
||||
|
||||
/* small_prim_precision = num_samples / 2^subpixel_bits
|
||||
* num_samples is also always a power of two, so the small prim precision can only be
|
||||
* a power of two between 2^-2 and 2^-6, therefore it's enough to remember the exponent.
|
||||
*/
|
||||
unsigned subpixel_bits = 256;
|
||||
int32_t small_prim_precision_log2 = util_logbase2(pipeline->graphics.ms.num_samples) - util_logbase2(subpixel_bits);
|
||||
nggc_settings |= ((uint32_t) small_prim_precision_log2 << 24u);
|
||||
}
|
||||
|
||||
return nggc_settings;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
|
||||
{
|
||||
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
const unsigned stage = pipeline->graphics.last_vgt_api_stage;
|
||||
const bool nggc_supported = pipeline->graphics.has_ngg_culling;
|
||||
|
||||
if (!nggc_supported && !cmd_buffer->state.last_nggc_settings) {
|
||||
/* Current shader doesn't support culling and culling was already disabled:
|
||||
* No further steps needed, just remember the SGPR's location is not set.
|
||||
*/
|
||||
cmd_buffer->state.last_nggc_settings_sgpr_idx = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check dirty flags:
|
||||
* - Dirty pipeline: SGPR index may have changed (we have to re-emit if changed).
|
||||
* - Dirty dynamic flags: culling settings may have changed.
|
||||
*/
|
||||
const bool dirty =
|
||||
cmd_buffer->state.dirty &
|
||||
(RADV_CMD_DIRTY_PIPELINE |
|
||||
RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
|
||||
RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT);
|
||||
|
||||
/* Check small draw status:
|
||||
* For small draw calls, we disable culling by setting the SGPR to 0.
|
||||
*/
|
||||
const bool skip =
|
||||
radv_skip_ngg_culling(
|
||||
stage == MESA_SHADER_TESS_EVAL, draw_info->count, draw_info->indirect,
|
||||
cmd_buffer->state.dynamic.viewport.count);
|
||||
|
||||
/* See if anything changed. */
|
||||
if (!dirty && skip == cmd_buffer->state.last_nggc_skip)
|
||||
return;
|
||||
|
||||
/* Remember small draw state. */
|
||||
cmd_buffer->state.last_nggc_skip = skip;
|
||||
const struct radv_shader_variant *v = pipeline->shaders[stage];
|
||||
assert(v->info.has_ngg_culling == nggc_supported);
|
||||
|
||||
/* Find the user SGPR. */
|
||||
const uint32_t base_reg = pipeline->user_data_0[stage];
|
||||
const int8_t nggc_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_CULLING_SETTINGS].sgpr_idx;
|
||||
assert(!nggc_supported || nggc_sgpr_idx != -1);
|
||||
|
||||
/* Get viewport transform. */
|
||||
float vp_scale[3], vp_translate[3];
|
||||
radv_get_viewport_xform(&cmd_buffer->state.dynamic.viewport.viewports[0], vp_scale, vp_translate);
|
||||
bool vp_y_inverted = (-vp_scale[1] + vp_translate[1]) > (vp_scale[1] + vp_translate[1]);
|
||||
|
||||
/* Get current culling settings. */
|
||||
uint32_t nggc_settings = nggc_supported && !skip
|
||||
? radv_get_ngg_culling_settings(cmd_buffer, vp_y_inverted)
|
||||
: ngg_cull_none;
|
||||
|
||||
bool emit_viewport = nggc_settings &&
|
||||
(cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_VIEWPORT ||
|
||||
cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx ||
|
||||
!cmd_buffer->state.last_nggc_settings);
|
||||
|
||||
if (emit_viewport) {
|
||||
/* Correction for inverted Y */
|
||||
if (vp_y_inverted) {
|
||||
vp_scale[1] = -vp_scale[1];
|
||||
vp_translate[1] = -vp_translate[1];
|
||||
}
|
||||
|
||||
/* Correction for number of samples per pixel. */
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
vp_scale[i] *= (float) pipeline->graphics.ms.num_samples;
|
||||
vp_translate[i] *= (float) pipeline->graphics.ms.num_samples;
|
||||
}
|
||||
|
||||
uint32_t vp_reg_values[4] = {fui(vp_scale[0]), fui(vp_scale[1]), fui(vp_translate[0]), fui(vp_translate[1])};
|
||||
const int8_t vp_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_VIEWPORT].sgpr_idx;
|
||||
assert(vp_sgpr_idx != -1);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + vp_sgpr_idx * 4, 4);
|
||||
radeon_emit_array(cmd_buffer->cs, vp_reg_values, 4);
|
||||
}
|
||||
|
||||
bool emit_settings = nggc_supported &&
|
||||
(cmd_buffer->state.last_nggc_settings != nggc_settings ||
|
||||
cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx);
|
||||
|
||||
/* This needs to be emitted when culling is turned on
|
||||
* and when it's already on but some settings change.
|
||||
*/
|
||||
if (emit_settings) {
|
||||
assert(nggc_sgpr_idx >= 0);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings);
|
||||
}
|
||||
|
||||
/* These only need to be emitted when culling is turned on or off,
|
||||
* but not when it stays on and just some settings change.
|
||||
*/
|
||||
if (!!cmd_buffer->state.last_nggc_settings != !!nggc_settings) {
|
||||
const struct radv_physical_device *physical_device = cmd_buffer->device->physical_device;
|
||||
uint32_t rsrc2 = v->config.rsrc2;
|
||||
uint32_t oversub_pc_lines = physical_device->rad_info.pc_lines / 4;
|
||||
|
||||
if (nggc_settings) {
|
||||
/* Tweak the parameter cache oversubscription.
|
||||
* This allows the HW to launch more NGG workgroups than the pre-allocated parameter
|
||||
* cache would normally allow, yielding better perf when culling is on.
|
||||
*/
|
||||
oversub_pc_lines = physical_device->rad_info.pc_lines * 3 / 4;
|
||||
} else {
|
||||
/* Allocate less LDS when culling is disabled. (But GS always needs it.) */
|
||||
if (stage != MESA_SHADER_GEOMETRY)
|
||||
rsrc2 = (rsrc2 & C_00B22C_LDS_SIZE) | S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling);
|
||||
}
|
||||
|
||||
/* When the pipeline is dirty, radv_emit_graphics_pipeline will write this register. */
|
||||
if (!(cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
|
||||
}
|
||||
|
||||
/* Update parameter cache oversubscription setting. */
|
||||
radeon_set_uconfig_reg(cmd_buffer->cs, R_030980_GE_PC_ALLOC,
|
||||
S_030980_OVERSUB_EN(physical_device->rad_info.use_late_alloc) |
|
||||
S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
|
||||
}
|
||||
|
||||
cmd_buffer->state.last_nggc_settings = nggc_settings;
|
||||
cmd_buffer->state.last_nggc_settings_sgpr_idx = nggc_sgpr_idx;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
|
||||
{
|
||||
@@ -5644,6 +5866,10 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
|
||||
cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
|
||||
radv_emit_rbplus_state(cmd_buffer);
|
||||
|
||||
if ((cmd_buffer->device->instance->perftest_flags & RADV_PERFTEST_NGGC) &&
|
||||
cmd_buffer->state.pipeline->graphics.is_ngg)
|
||||
radv_emit_ngg_culling_state(cmd_buffer, info);
|
||||
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
|
||||
radv_emit_graphics_pipeline(cmd_buffer);
|
||||
|
||||
|
@@ -1790,6 +1790,10 @@ radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
|
||||
S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
|
||||
S_028810_DX_RASTERIZATION_KILL(raster_info->rasterizerDiscardEnable ? 1 : 0) |
|
||||
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
|
||||
|
||||
pipeline->graphics.uses_conservative_overestimate =
|
||||
radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState) ==
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -5441,6 +5445,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
|
||||
pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
|
||||
|
||||
pipeline->graphics.is_ngg = radv_pipeline_has_ngg(pipeline);
|
||||
pipeline->graphics.has_ngg_culling =
|
||||
pipeline->graphics.is_ngg &&
|
||||
pipeline->shaders[pipeline->graphics.last_vgt_api_stage]->info.has_ngg_culling;
|
||||
|
||||
radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend);
|
||||
|
||||
|
@@ -1415,6 +1415,11 @@ struct radv_cmd_state {
|
||||
bool pending_sqtt_barrier_end;
|
||||
enum rgp_flush_bits sqtt_flush_bits;
|
||||
|
||||
/* NGG culling state. */
|
||||
uint32_t last_nggc_settings;
|
||||
int8_t last_nggc_settings_sgpr_idx;
|
||||
bool last_nggc_skip;
|
||||
|
||||
uint8_t cb_mip[MAX_RTS];
|
||||
|
||||
/* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
|
||||
@@ -1762,6 +1767,7 @@ struct radv_pipeline {
|
||||
unsigned pa_cl_clip_cntl;
|
||||
unsigned cb_color_control;
|
||||
bool uses_dynamic_stride;
|
||||
bool uses_conservative_overestimate;
|
||||
|
||||
/* Used for rbplus */
|
||||
uint32_t col_format;
|
||||
@@ -1769,6 +1775,7 @@ struct radv_pipeline {
|
||||
|
||||
/* Whether the pipeline uses NGG (GFX10+). */
|
||||
bool is_ngg;
|
||||
bool has_ngg_culling;
|
||||
|
||||
/* Last pre-PS API stage */
|
||||
gl_shader_stage last_vgt_api_stage;
|
||||
|
@@ -969,6 +969,8 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
|
||||
key->vs_common_out.export_prim_id,
|
||||
key->vs.provoking_vtx_last);
|
||||
|
||||
info->has_ngg_culling = out_conf.can_cull;
|
||||
info->num_lds_blocks_when_not_culling = DIV_ROUND_UP(out_conf.lds_bytes_if_culling_off, device->physical_device->rad_info.lds_encode_granularity);
|
||||
info->is_ngg_passthrough = out_conf.passthrough;
|
||||
key->vs_common_out.as_ngg_passthrough = out_conf.passthrough;
|
||||
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
|
@@ -162,7 +162,9 @@ enum radv_ud_index {
|
||||
AC_UD_VIEW_INDEX = 4,
|
||||
AC_UD_STREAMOUT_BUFFERS = 5,
|
||||
AC_UD_NGG_GS_STATE = 6,
|
||||
AC_UD_SHADER_START = 7,
|
||||
AC_UD_NGG_CULLING_SETTINGS = 7,
|
||||
AC_UD_NGG_VIEWPORT = 8,
|
||||
AC_UD_SHADER_START = 9,
|
||||
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
|
||||
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
|
||||
AC_UD_VS_MAX_UD,
|
||||
@@ -261,6 +263,8 @@ struct radv_shader_info {
|
||||
bool need_indirect_descriptor_sets;
|
||||
bool is_ngg;
|
||||
bool is_ngg_passthrough;
|
||||
bool has_ngg_culling;
|
||||
uint32_t num_lds_blocks_when_not_culling;
|
||||
uint32_t num_tess_patches;
|
||||
struct {
|
||||
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
|
||||
|
@@ -117,6 +117,19 @@ count_vs_user_sgprs(struct radv_shader_args *args)
|
||||
return count;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
count_ngg_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
|
||||
{
|
||||
unsigned count = 0;
|
||||
|
||||
if (stage == MESA_SHADER_GEOMETRY)
|
||||
count += 1; /* ngg_gs_state */
|
||||
if (args->shader_info->has_ngg_culling)
|
||||
count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void
|
||||
allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
|
||||
{
|
||||
@@ -184,6 +197,8 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (!args->is_gs_copy_shader)
|
||||
user_sgpr_count += count_vs_user_sgprs(args);
|
||||
if (args->options->key.vs_common_out.as_ngg)
|
||||
user_sgpr_count += count_ngg_sgprs(args, stage);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (has_previous_stage) {
|
||||
@@ -192,11 +207,13 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (args->options->key.vs_common_out.as_ngg)
|
||||
user_sgpr_count += count_ngg_sgprs(args, stage);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
if (has_previous_stage) {
|
||||
if (args->options->key.vs_common_out.as_ngg)
|
||||
user_sgpr_count++; /* NGG GS state */
|
||||
user_sgpr_count += count_ngg_sgprs(args, stage);
|
||||
|
||||
if (previous_stage == MESA_SHADER_VERTEX) {
|
||||
user_sgpr_count += count_vs_user_sgprs(args);
|
||||
@@ -356,6 +373,22 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
|
||||
}
|
||||
|
||||
static void
|
||||
declare_ngg_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
|
||||
{
|
||||
if (stage == MESA_SHADER_GEOMETRY) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
|
||||
}
|
||||
|
||||
if (args->shader_info->has_ngg_culling) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
|
||||
uint8_t *user_sgpr_idx)
|
||||
@@ -405,6 +438,24 @@ set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
set_ngg_sgprs_locs(struct radv_shader_args *args, gl_shader_stage stage, uint8_t *user_sgpr_idx)
|
||||
{
|
||||
if (stage == MESA_SHADER_GEOMETRY) {
|
||||
assert(args->ngg_gs_state.used);
|
||||
set_loc_shader(args, AC_UD_NGG_GS_STATE, user_sgpr_idx, 1);
|
||||
}
|
||||
|
||||
if (args->shader_info->has_ngg_culling) {
|
||||
assert(args->ngg_culling_settings.used &&
|
||||
args->ngg_viewport_scale[0].used && args->ngg_viewport_scale[1].used &&
|
||||
args->ngg_viewport_translate[0].used && args->ngg_viewport_translate[1].used);
|
||||
|
||||
set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, user_sgpr_idx, 1);
|
||||
set_loc_shader(args, AC_UD_NGG_VIEWPORT, user_sgpr_idx, 4);
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns whether the stage is a stage that can be directly before the GS */
|
||||
static bool
|
||||
is_pre_gs_stage(gl_shader_stage stage)
|
||||
@@ -488,6 +539,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
}
|
||||
if (args->options->key.vs_common_out.as_ngg) {
|
||||
declare_ngg_sgprs(args, stage);
|
||||
}
|
||||
|
||||
declare_vs_input_vgprs(args);
|
||||
break;
|
||||
@@ -547,6 +601,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
}
|
||||
if (args->options->key.vs_common_out.as_ngg) {
|
||||
declare_ngg_sgprs(args, stage);
|
||||
}
|
||||
declare_tes_input_vgprs(args);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
@@ -576,7 +633,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
|
||||
}
|
||||
|
||||
if (args->options->key.vs_common_out.as_ngg) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
|
||||
declare_ngg_sgprs(args, stage);
|
||||
}
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
|
||||
@@ -669,6 +726,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
|
||||
set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
|
||||
if (args->ac.view_index.used)
|
||||
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
|
||||
if (args->options->key.vs_common_out.as_ngg)
|
||||
set_ngg_sgprs_locs(args, stage, &user_sgpr_idx);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
|
||||
@@ -678,6 +737,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (args->ac.view_index.used)
|
||||
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
|
||||
if (args->options->key.vs_common_out.as_ngg)
|
||||
set_ngg_sgprs_locs(args, stage, &user_sgpr_idx);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
if (has_previous_stage) {
|
||||
@@ -688,8 +749,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
|
||||
if (args->ac.view_index.used)
|
||||
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
|
||||
|
||||
if (args->ngg_gs_state.used)
|
||||
set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
|
||||
if (args->options->key.vs_common_out.as_ngg)
|
||||
set_ngg_sgprs_locs(args, stage, &user_sgpr_idx);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
break;
|
||||
|
@@ -41,6 +41,9 @@ struct radv_shader_args {
|
||||
|
||||
/* NGG GS */
|
||||
struct ac_arg ngg_gs_state;
|
||||
struct ac_arg ngg_culling_settings;
|
||||
struct ac_arg ngg_viewport_scale[2];
|
||||
struct ac_arg ngg_viewport_translate[2];
|
||||
|
||||
bool is_gs_copy_shader;
|
||||
bool is_trap_handler_shader;
|
||||
|
Reference in New Issue
Block a user