radv: New shader args for NGG culling settings and viewport.

Add new shader arguments in RADV for:
- NGG culling settings
- Viewport transform

These will be used by NGG culling shaders.

Additionally, some tweaks are made to some config registers
in order to make culling shaders more efficient.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10525>
This commit is contained in:
Timur Kristóf
2021-06-07 23:23:38 +02:00
committed by Marge Bot
parent ed163a44b6
commit 9a95f5487f
7 changed files with 315 additions and 5 deletions

View File

@@ -1327,6 +1327,19 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
if (pipeline->graphics.has_ngg_culling &&
pipeline->graphics.last_vgt_api_stage != MESA_SHADER_GEOMETRY &&
!cmd_buffer->state.last_nggc_settings) {
/* The already emitted RSRC2 contains the LDS required for NGG culling.
* Culling is currently disabled, so re-emit RSRC2 to reduce LDS usage.
* API GS always needs LDS, so this isn't useful there.
*/
struct radv_shader_variant *v = pipeline->shaders[pipeline->graphics.last_vgt_api_stage];
radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
(v->config.rsrc2 & C_00B22C_LDS_SIZE) |
S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling));
}
if (!cmd_buffer->state.emitted_pipeline ||
cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
@@ -3839,6 +3852,8 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
cmd_buffer->state.last_sx_ps_downconvert = -1;
cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
cmd_buffer->state.last_sx_blend_opt_control = -1;
cmd_buffer->state.last_nggc_settings = -1;
cmd_buffer->state.last_nggc_settings_sgpr_idx = -1;
cmd_buffer->usage_flags = pBeginInfo->flags;
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
@@ -4961,6 +4976,10 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
if (secondary->state.last_index_type != -1) {
primary->state.last_index_type = secondary->state.last_index_type;
}
primary->state.last_nggc_settings = secondary->state.last_nggc_settings;
primary->state.last_nggc_settings_sgpr_idx = secondary->state.last_nggc_settings_sgpr_idx;
primary->state.last_nggc_skip = secondary->state.last_nggc_skip;
}
/* After executing commands from secondary buffers we have to dirty
@@ -5635,6 +5654,209 @@ radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
return false;
}
enum {
ngg_cull_none = 0,
ngg_cull_front_face = 1,
ngg_cull_back_face = 2,
ngg_cull_face_is_ccw = 4,
ngg_cull_small_primitives = 8,
};
ALWAYS_INLINE static bool
radv_skip_ngg_culling(bool has_tess, const unsigned vtx_cnt,
bool indirect, unsigned num_viewports)
{
/* If we have to draw only a few vertices, we get better latency if
* we disable NGG culling.
*
* When tessellation is used, what matters is the number of tessellated
* vertices, so let's always assume it's not a small draw.
*
* TODO: Figure out how to do culling with multiple viewports efficiently.
*/
return !has_tess && !indirect && vtx_cnt < 512 && num_viewports == 1;
}
ALWAYS_INLINE static uint32_t
radv_get_ngg_culling_settings(struct radv_cmd_buffer *cmd_buffer, bool vp_y_inverted)
{
const struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
/* Cull every triangle when rasterizer discard is enabled. */
if (d->rasterizer_discard_enable ||
G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.pa_cl_clip_cntl))
return ngg_cull_front_face | ngg_cull_back_face;
uint32_t pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl;
uint32_t nggc_settings = ngg_cull_none;
/* The culling code needs to know whether face is CW or CCW. */
bool ccw = (pipeline->graphics.needed_dynamic_state & RADV_DYNAMIC_FRONT_FACE)
? d->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE
: G_028814_FACE(pa_su_sc_mode_cntl) == 0;
/* Take inverted viewport into account. */
ccw ^= vp_y_inverted;
if (ccw)
nggc_settings |= ngg_cull_face_is_ccw;
/* Face culling settings. */
if ((pipeline->graphics.needed_dynamic_state & RADV_DYNAMIC_CULL_MODE)
? (d->cull_mode & VK_CULL_MODE_FRONT_BIT)
: G_028814_CULL_FRONT(pa_su_sc_mode_cntl))
nggc_settings |= ngg_cull_front_face;
if ((pipeline->graphics.needed_dynamic_state & RADV_DYNAMIC_CULL_MODE)
? (d->cull_mode & VK_CULL_MODE_BACK_BIT)
: G_028814_CULL_BACK(pa_su_sc_mode_cntl))
nggc_settings |= ngg_cull_back_face;
/* Small primitive culling is only valid when conservative overestimation is not used. */
if (!pipeline->graphics.uses_conservative_overestimate) {
nggc_settings |= ngg_cull_small_primitives;
/* small_prim_precision = num_samples / 2^subpixel_bits
* num_samples is also always a power of two, so the small prim precision can only be
* a power of two between 2^-2 and 2^-6, therefore it's enough to remember the exponent.
*/
unsigned subpixel_bits = 256;
int32_t small_prim_precision_log2 = util_logbase2(pipeline->graphics.ms.num_samples) - util_logbase2(subpixel_bits);
nggc_settings |= ((uint32_t) small_prim_precision_log2 << 24u);
}
return nggc_settings;
}
static void
radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
{
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
const unsigned stage = pipeline->graphics.last_vgt_api_stage;
const bool nggc_supported = pipeline->graphics.has_ngg_culling;
if (!nggc_supported && !cmd_buffer->state.last_nggc_settings) {
/* Current shader doesn't support culling and culling was already disabled:
* No further steps needed, just remember the SGPR's location is not set.
*/
cmd_buffer->state.last_nggc_settings_sgpr_idx = -1;
return;
}
/* Check dirty flags:
* - Dirty pipeline: SGPR index may have changed (we have to re-emit if changed).
* - Dirty dynamic flags: culling settings may have changed.
*/
const bool dirty =
cmd_buffer->state.dirty &
(RADV_CMD_DIRTY_PIPELINE |
RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT);
/* Check small draw status:
* For small draw calls, we disable culling by setting the SGPR to 0.
*/
const bool skip =
radv_skip_ngg_culling(
stage == MESA_SHADER_TESS_EVAL, draw_info->count, draw_info->indirect,
cmd_buffer->state.dynamic.viewport.count);
/* See if anything changed. */
if (!dirty && skip == cmd_buffer->state.last_nggc_skip)
return;
/* Remember small draw state. */
cmd_buffer->state.last_nggc_skip = skip;
const struct radv_shader_variant *v = pipeline->shaders[stage];
assert(v->info.has_ngg_culling == nggc_supported);
/* Find the user SGPR. */
const uint32_t base_reg = pipeline->user_data_0[stage];
const int8_t nggc_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_CULLING_SETTINGS].sgpr_idx;
assert(!nggc_supported || nggc_sgpr_idx != -1);
/* Get viewport transform. */
float vp_scale[3], vp_translate[3];
radv_get_viewport_xform(&cmd_buffer->state.dynamic.viewport.viewports[0], vp_scale, vp_translate);
bool vp_y_inverted = (-vp_scale[1] + vp_translate[1]) > (vp_scale[1] + vp_translate[1]);
/* Get current culling settings. */
uint32_t nggc_settings = nggc_supported && !skip
? radv_get_ngg_culling_settings(cmd_buffer, vp_y_inverted)
: ngg_cull_none;
bool emit_viewport = nggc_settings &&
(cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_VIEWPORT ||
cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx ||
!cmd_buffer->state.last_nggc_settings);
if (emit_viewport) {
/* Correction for inverted Y */
if (vp_y_inverted) {
vp_scale[1] = -vp_scale[1];
vp_translate[1] = -vp_translate[1];
}
/* Correction for number of samples per pixel. */
for (unsigned i = 0; i < 2; ++i) {
vp_scale[i] *= (float) pipeline->graphics.ms.num_samples;
vp_translate[i] *= (float) pipeline->graphics.ms.num_samples;
}
uint32_t vp_reg_values[4] = {fui(vp_scale[0]), fui(vp_scale[1]), fui(vp_translate[0]), fui(vp_translate[1])};
const int8_t vp_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_VIEWPORT].sgpr_idx;
assert(vp_sgpr_idx != -1);
radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + vp_sgpr_idx * 4, 4);
radeon_emit_array(cmd_buffer->cs, vp_reg_values, 4);
}
bool emit_settings = nggc_supported &&
(cmd_buffer->state.last_nggc_settings != nggc_settings ||
cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx);
/* This needs to be emitted when culling is turned on
* and when it's already on but some settings change.
*/
if (emit_settings) {
assert(nggc_sgpr_idx >= 0);
radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings);
}
/* These only need to be emitted when culling is turned on or off,
* but not when it stays on and just some settings change.
*/
if (!!cmd_buffer->state.last_nggc_settings != !!nggc_settings) {
const struct radv_physical_device *physical_device = cmd_buffer->device->physical_device;
uint32_t rsrc2 = v->config.rsrc2;
uint32_t oversub_pc_lines = physical_device->rad_info.pc_lines / 4;
if (nggc_settings) {
/* Tweak the parameter cache oversubscription.
* This allows the HW to launch more NGG workgroups than the pre-allocated parameter
* cache would normally allow, yielding better perf when culling is on.
*/
oversub_pc_lines = physical_device->rad_info.pc_lines * 3 / 4;
} else {
/* Allocate less LDS when culling is disabled. (But GS always needs it.) */
if (stage != MESA_SHADER_GEOMETRY)
rsrc2 = (rsrc2 & C_00B22C_LDS_SIZE) | S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling);
}
/* When the pipeline is dirty, radv_emit_graphics_pipeline will write this register. */
if (!(cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
}
/* Update parameter cache oversubscription setting. */
radeon_set_uconfig_reg(cmd_buffer->cs, R_030980_GE_PC_ALLOC,
S_030980_OVERSUB_EN(physical_device->rad_info.use_late_alloc) |
S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
}
cmd_buffer->state.last_nggc_settings = nggc_settings;
cmd_buffer->state.last_nggc_settings_sgpr_idx = nggc_sgpr_idx;
}
static void
radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
{
@@ -5644,6 +5866,10 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
radv_emit_rbplus_state(cmd_buffer);
if ((cmd_buffer->device->instance->perftest_flags & RADV_PERFTEST_NGGC) &&
cmd_buffer->state.pipeline->graphics.is_ngg)
radv_emit_ngg_culling_state(cmd_buffer, info);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
radv_emit_graphics_pipeline(cmd_buffer);

View File

@@ -1790,6 +1790,10 @@ radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
S_028810_DX_RASTERIZATION_KILL(raster_info->rasterizerDiscardEnable ? 1 : 0) |
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
pipeline->graphics.uses_conservative_overestimate =
radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState) ==
VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT;
}
static void
@@ -5441,6 +5445,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
pipeline->graphics.is_ngg = radv_pipeline_has_ngg(pipeline);
pipeline->graphics.has_ngg_culling =
pipeline->graphics.is_ngg &&
pipeline->shaders[pipeline->graphics.last_vgt_api_stage]->info.has_ngg_culling;
radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend);

View File

@@ -1415,6 +1415,11 @@ struct radv_cmd_state {
bool pending_sqtt_barrier_end;
enum rgp_flush_bits sqtt_flush_bits;
/* NGG culling state. */
uint32_t last_nggc_settings;
int8_t last_nggc_settings_sgpr_idx;
bool last_nggc_skip;
uint8_t cb_mip[MAX_RTS];
/* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
@@ -1762,6 +1767,7 @@ struct radv_pipeline {
unsigned pa_cl_clip_cntl;
unsigned cb_color_control;
bool uses_dynamic_stride;
bool uses_conservative_overestimate;
/* Used for rbplus */
uint32_t col_format;
@@ -1769,6 +1775,7 @@ struct radv_pipeline {
/* Whether the pipeline uses NGG (GFX10+). */
bool is_ngg;
bool has_ngg_culling;
/* Last pre-PS API stage */
gl_shader_stage last_vgt_api_stage;

View File

@@ -969,6 +969,8 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
key->vs_common_out.export_prim_id,
key->vs.provoking_vtx_last);
info->has_ngg_culling = out_conf.can_cull;
info->num_lds_blocks_when_not_culling = DIV_ROUND_UP(out_conf.lds_bytes_if_culling_off, device->physical_device->rad_info.lds_encode_granularity);
info->is_ngg_passthrough = out_conf.passthrough;
key->vs_common_out.as_ngg_passthrough = out_conf.passthrough;
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {

View File

@@ -162,7 +162,9 @@ enum radv_ud_index {
AC_UD_VIEW_INDEX = 4,
AC_UD_STREAMOUT_BUFFERS = 5,
AC_UD_NGG_GS_STATE = 6,
AC_UD_SHADER_START = 7,
AC_UD_NGG_CULLING_SETTINGS = 7,
AC_UD_NGG_VIEWPORT = 8,
AC_UD_SHADER_START = 9,
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_MAX_UD,
@@ -261,6 +263,8 @@ struct radv_shader_info {
bool need_indirect_descriptor_sets;
bool is_ngg;
bool is_ngg_passthrough;
bool has_ngg_culling;
uint32_t num_lds_blocks_when_not_culling;
uint32_t num_tess_patches;
struct {
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];

View File

@@ -117,6 +117,19 @@ count_vs_user_sgprs(struct radv_shader_args *args)
return count;
}
static unsigned
count_ngg_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
{
unsigned count = 0;
if (stage == MESA_SHADER_GEOMETRY)
count += 1; /* ngg_gs_state */
if (args->shader_info->has_ngg_culling)
count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
return count;
}
static void
allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
{
@@ -184,6 +197,8 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h
case MESA_SHADER_VERTEX:
if (!args->is_gs_copy_shader)
user_sgpr_count += count_vs_user_sgprs(args);
if (args->options->key.vs_common_out.as_ngg)
user_sgpr_count += count_ngg_sgprs(args, stage);
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
@@ -192,11 +207,13 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h
}
break;
case MESA_SHADER_TESS_EVAL:
if (args->options->key.vs_common_out.as_ngg)
user_sgpr_count += count_ngg_sgprs(args, stage);
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (args->options->key.vs_common_out.as_ngg)
user_sgpr_count++; /* NGG GS state */
user_sgpr_count += count_ngg_sgprs(args, stage);
if (previous_stage == MESA_SHADER_VERTEX) {
user_sgpr_count += count_vs_user_sgprs(args);
@@ -356,6 +373,22 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
}
static void
declare_ngg_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
{
if (stage == MESA_SHADER_GEOMETRY) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
}
if (args->shader_info->has_ngg_culling) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]);
}
}
static void
set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
uint8_t *user_sgpr_idx)
@@ -405,6 +438,24 @@ set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
}
}
static void
set_ngg_sgprs_locs(struct radv_shader_args *args, gl_shader_stage stage, uint8_t *user_sgpr_idx)
{
if (stage == MESA_SHADER_GEOMETRY) {
assert(args->ngg_gs_state.used);
set_loc_shader(args, AC_UD_NGG_GS_STATE, user_sgpr_idx, 1);
}
if (args->shader_info->has_ngg_culling) {
assert(args->ngg_culling_settings.used &&
args->ngg_viewport_scale[0].used && args->ngg_viewport_scale[1].used &&
args->ngg_viewport_translate[0].used && args->ngg_viewport_translate[1].used);
set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, user_sgpr_idx, 1);
set_loc_shader(args, AC_UD_NGG_VIEWPORT, user_sgpr_idx, 4);
}
}
/* Returns whether the stage is a stage that can be directly before the GS */
static bool
is_pre_gs_stage(gl_shader_stage stage)
@@ -488,6 +539,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
if (args->options->key.vs_common_out.as_ngg) {
declare_ngg_sgprs(args, stage);
}
declare_vs_input_vgprs(args);
break;
@@ -547,6 +601,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
if (args->options->key.vs_common_out.as_ngg) {
declare_ngg_sgprs(args, stage);
}
declare_tes_input_vgprs(args);
break;
case MESA_SHADER_GEOMETRY:
@@ -576,7 +633,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
}
if (args->options->key.vs_common_out.as_ngg) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
declare_ngg_sgprs(args, stage);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
@@ -669,6 +726,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
if (args->options->key.vs_common_out.as_ngg)
set_ngg_sgprs_locs(args, stage, &user_sgpr_idx);
break;
case MESA_SHADER_TESS_CTRL:
set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
@@ -678,6 +737,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
case MESA_SHADER_TESS_EVAL:
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
if (args->options->key.vs_common_out.as_ngg)
set_ngg_sgprs_locs(args, stage, &user_sgpr_idx);
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
@@ -688,8 +749,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
if (args->ngg_gs_state.used)
set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
if (args->options->key.vs_common_out.as_ngg)
set_ngg_sgprs_locs(args, stage, &user_sgpr_idx);
break;
case MESA_SHADER_FRAGMENT:
break;

View File

@@ -41,6 +41,9 @@ struct radv_shader_args {
/* NGG GS */
struct ac_arg ngg_gs_state;
struct ac_arg ngg_culling_settings;
struct ac_arg ngg_viewport_scale[2];
struct ac_arg ngg_viewport_translate[2];
bool is_gs_copy_shader;
bool is_trap_handler_shader;