ac/nir/ngg: support clipdist culling

Port from radeonsi.

Besides vertex position based primitive culling, clipdist
attribute can also be used to cull a primitive. Normally
it's used by fixed-pipeline, but when NGG we can treate it
as a culling condition to filter out invisible primitive
before fixed-pipeline.

There are two kinds of clipdist:
1. user define a clip plane explicitly by glClipPlane(),
   fixed-pipeline calculate with vertex position to get
   clipdist, then cull. This is the legacy way.
2. Now GLSL define gl_ClipDistance/gl_CullDiatance so that
   user can calculate clipdist in any way he like.

This implementation support both way.

Acked-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17651>
This commit is contained in:
Qiang Yu
2022-07-08 21:35:18 +08:00
committed by Marge Bot
parent 620e62bb39
commit f75452918b
3 changed files with 111 additions and 18 deletions

View File

@@ -131,7 +131,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
bool provoking_vtx_last,
bool use_edgeflags,
bool has_prim_query,
uint32_t instance_rate_inputs);
uint32_t instance_rate_inputs,
uint32_t clipdist_enable_mask,
uint32_t user_clip_plane_enable_mask);
void
ac_nir_lower_ngg_gs(nir_shader *shader,

View File

@@ -71,6 +71,13 @@ typedef struct
nir_instr *compact_arg_stores[4];
nir_intrinsic_instr *overwrite_args;
/* clip distance */
nir_variable *clip_vertex_var;
nir_variable *clipdist_neg_mask_var;
unsigned clipdist_enable_mask;
unsigned user_clip_plane_enable_mask;
bool has_clipdist;
} lower_ngg_nogs_state;
typedef struct
@@ -174,10 +181,6 @@ typedef struct
} output_info[VARYING_SLOT_MAX];
} lower_ngg_ms_state;
typedef struct {
nir_variable *pre_cull_position_value_var;
} remove_culling_shader_outputs_state;
/* Per-vertex LDS layout of culling shaders */
enum {
/* Position of the ES vertex (at the beginning for alignment reasons) */
@@ -190,6 +193,8 @@ enum {
lds_es_vertex_accepted = 16,
/* ID of the thread which will export the current thread's vertex */
lds_es_exporter_tid = 17,
/* bit i is set when the i'th clip distance of a vertex is negative */
lds_es_clipdist_neg_mask = 18,
/* Repacked arguments - also listed separately for VS and TES */
lds_es_arg_0 = 20,
@@ -529,10 +534,19 @@ store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *value,
nir_store_var(b, var, value, writemask);
}
static void
add_clipdist_bit(nir_builder *b, nir_ssa_def *dist, unsigned index, nir_variable *mask)
{
nir_ssa_def *is_neg = nir_flt(b, dist, nir_imm_float(b, 0));
nir_ssa_def *neg_mask = nir_ishl_imm(b, nir_b2i8(b, is_neg), index);
neg_mask = nir_ior(b, neg_mask, nir_load_var(b, mask));
nir_store_var(b, mask, neg_mask, 1);
}
static bool
remove_culling_shader_output(nir_builder *b, nir_instr *instr, void *state)
{
remove_culling_shader_outputs_state *s = (remove_culling_shader_outputs_state *) state;
lower_ngg_nogs_state *s = (lower_ngg_nogs_state *) state;
if (instr->type != nir_instr_type_intrinsic)
return false;
@@ -560,7 +574,24 @@ remove_culling_shader_output(nir_builder *b, nir_instr *instr, void *state)
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
switch (io_sem.location) {
case VARYING_SLOT_POS:
store_var_components(b, s->pre_cull_position_value_var, store_val, component, writemask);
store_var_components(b, s->position_value_var, store_val, component, writemask);
break;
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1: {
unsigned base = io_sem.location == VARYING_SLOT_CLIP_DIST1 ? 4 : 0;
base += component;
/* valid clipdist component mask */
unsigned mask = (s->clipdist_enable_mask >> base) & writemask;
u_foreach_bit(i, mask) {
add_clipdist_bit(b, nir_channel(b, store_val, i), base + i,
s->clipdist_neg_mask_var);
s->has_clipdist = true;
}
break;
}
case VARYING_SLOT_CLIP_VERTEX:
store_var_components(b, s->clip_vertex_var, store_val, component, writemask);
break;
default:
break;
@@ -572,14 +603,10 @@ remove_culling_shader_output(nir_builder *b, nir_instr *instr, void *state)
}
static void
remove_culling_shader_outputs(nir_shader *culling_shader, lower_ngg_nogs_state *nogs_state, nir_variable *pre_cull_position_value_var)
remove_culling_shader_outputs(nir_shader *culling_shader, lower_ngg_nogs_state *nogs_state)
{
remove_culling_shader_outputs_state s = {
.pre_cull_position_value_var = pre_cull_position_value_var,
};
nir_shader_instructions_pass(culling_shader, remove_culling_shader_output,
nir_metadata_block_index | nir_metadata_dominance, &s);
nir_metadata_block_index | nir_metadata_dominance, nogs_state);
/* Remove dead code resulting from the deleted outputs. */
bool progress;
@@ -1163,6 +1190,38 @@ cull_primitive_accepted(nir_builder *b, void *state)
nir_store_shared(b, nir_imm_intN_t(b, 1, 8), s->vtx_addr[vtx], .base = lds_es_vertex_accepted);
}
static void
clipdist_culling_es_part(nir_builder *b, lower_ngg_nogs_state *nogs_state,
nir_ssa_def *es_vertex_lds_addr)
{
/* no gl_ClipDistance used but we have user defined clip plane */
if (nogs_state->user_clip_plane_enable_mask && !nogs_state->has_clipdist) {
/* use gl_ClipVertex if defined */
nir_variable *clip_vertex_var =
b->shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX) ?
nogs_state->clip_vertex_var : nogs_state->position_value_var;
nir_ssa_def *clip_vertex = nir_load_var(b, clip_vertex_var);
/* clip against user defined clip planes */
for (unsigned i = 0; i < 8; i++) {
if (!(nogs_state->user_clip_plane_enable_mask & BITFIELD_BIT(i)))
continue;
nir_ssa_def *plane = nir_load_user_clip_plane(b, .ucp_id = i);
nir_ssa_def *dist = nir_fdot(b, clip_vertex, plane);
add_clipdist_bit(b, dist, i, nogs_state->clipdist_neg_mask_var);
}
nogs_state->has_clipdist = true;
}
/* store clipdist_neg_mask to LDS for culling latter in gs thread */
if (nogs_state->has_clipdist) {
nir_ssa_def *mask = nir_load_var(b, nogs_state->clipdist_neg_mask_var);
nir_store_shared(b, mask, es_vertex_lds_addr, .base = lds_es_clipdist_neg_mask);
}
}
static void
add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *nogs_state)
{
@@ -1201,6 +1260,13 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
nir_local_variable_create(impl, glsl_uint_type(), "repacked_arg_3"),
};
if (nogs_state->clipdist_enable_mask || nogs_state->user_clip_plane_enable_mask) {
nogs_state->clip_vertex_var =
nir_local_variable_create(impl, glsl_vec4_type(), "clip_vertex");
nogs_state->clipdist_neg_mask_var =
nir_local_variable_create(impl, glsl_uint8_t_type(), "clipdist_neg_mask");
}
/* Top part of the culling shader (aka. position shader part)
*
* We clone the full ES shader and emit it here, but we only really care
@@ -1247,7 +1313,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
/* Remove all non-position outputs, and put the position output into the variable. */
nir_metadata_preserve(impl, nir_metadata_none);
remove_culling_shader_outputs(b->shader, nogs_state, position_value_var);
remove_culling_shader_outputs(b->shader, nogs_state);
b->cursor = nir_after_cf_list(&impl->body);
/* Run culling algorithms if culling is enabled.
@@ -1275,6 +1341,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
/* Clear out the ES accepted flag in LDS */
nir_store_shared(b, nir_imm_zero(b, 1, 8), es_vertex_lds_addr, .align_mul = 4, .base = lds_es_vertex_accepted);
/* For clipdist culling */
clipdist_culling_es_part(b, nogs_state, es_vertex_lds_addr);
}
nir_pop_if(b, if_es_thread);
@@ -1308,8 +1377,23 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
pos[vtx][1] = nir_channel(b, xy, 1);
}
nir_ssa_def *accepted_by_clipdist;
if (nogs_state->has_clipdist) {
nir_ssa_def *clipdist_neg_mask = nir_imm_intN_t(b, 0xff, 8);
for (unsigned vtx = 0; vtx < nogs_state->num_vertices_per_primitives; ++vtx) {
nir_ssa_def *mask =
nir_load_shared(b, 1, 8, nogs_state->vtx_addr[vtx],
.base = lds_es_clipdist_neg_mask);
clipdist_neg_mask = nir_iand(b, clipdist_neg_mask, mask);
}
/* primitive is culled if any plane's clipdist of all vertices are negative */
accepted_by_clipdist = nir_ieq_imm(b, clipdist_neg_mask, 0);
} else {
accepted_by_clipdist = nir_imm_bool(b, true);
}
/* See if the current primitive is accepted */
ac_nir_cull_primitive(b, nir_imm_bool(b, true), pos,
ac_nir_cull_primitive(b, accepted_by_clipdist, pos,
nogs_state->num_vertices_per_primitives,
cull_primitive_accepted, nogs_state);
}
@@ -1414,7 +1498,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
bool provoking_vtx_last,
bool use_edgeflags,
bool has_prim_query,
uint32_t instance_rate_inputs)
uint32_t instance_rate_inputs,
uint32_t clipdist_enable_mask,
uint32_t user_clip_plane_enable_mask)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
@@ -1443,6 +1529,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
.max_es_num_vertices = max_num_es_vertices,
.wave_size = wave_size,
.instance_rate_inputs = instance_rate_inputs,
.clipdist_enable_mask = clipdist_enable_mask,
.user_clip_plane_enable_mask = user_clip_plane_enable_mask,
};
const bool need_prim_id_store_shared =
@@ -2087,8 +2175,11 @@ ngg_gs_cull_primitive(nir_builder *b, nir_ssa_def *tid_in_tg, nir_ssa_def *max_v
pos[i][1] = nir_fdiv(b, pos[i][1], pos[i][3]);
}
/* TODO: support clipdist culling in GS */
nir_ssa_def *accepted_by_clipdist = nir_imm_bool(b, true);
nir_ssa_def *accepted = ac_nir_cull_primitive(
b, nir_imm_bool(b, true), pos, s->num_vertices_per_primitive, NULL, NULL);
b, accepted_by_clipdist, pos, s->num_vertices_per_primitive, NULL, NULL);
nir_if *if_rejected = nir_push_if(b, nir_inot(b, accepted));
{

View File

@@ -1334,7 +1334,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
info->workgroup_size, info->wave_size, info->has_ngg_culling,
info->has_ngg_early_prim_export, info->is_ngg_passthrough, export_prim_id,
pl_key->vs.provoking_vtx_last, false, pl_key->primitives_generated_query,
pl_key->vs.instance_rate_inputs);
pl_key->vs.instance_rate_inputs, 0, 0);
/* Increase ESGS ring size so the LLVM binary contains the correct LDS size. */
ngg_stage->info.ngg_info.esgs_ring_size = nir->info.shared_size;