ac/nir/ngg: support clipdist culling
Port from radeonsi. Besides vertex position based primitive culling, clipdist attribute can also be used to cull a primitive. Normally it's used by fixed-pipeline, but when NGG we can treate it as a culling condition to filter out invisible primitive before fixed-pipeline. There are two kinds of clipdist: 1. user define a clip plane explicitly by glClipPlane(), fixed-pipeline calculate with vertex position to get clipdist, then cull. This is the legacy way. 2. Now GLSL define gl_ClipDistance/gl_CullDiatance so that user can calculate clipdist in any way he like. This implementation support both way. Acked-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17651>
This commit is contained in:
@@ -131,7 +131,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
|
|||||||
bool provoking_vtx_last,
|
bool provoking_vtx_last,
|
||||||
bool use_edgeflags,
|
bool use_edgeflags,
|
||||||
bool has_prim_query,
|
bool has_prim_query,
|
||||||
uint32_t instance_rate_inputs);
|
uint32_t instance_rate_inputs,
|
||||||
|
uint32_t clipdist_enable_mask,
|
||||||
|
uint32_t user_clip_plane_enable_mask);
|
||||||
|
|
||||||
void
|
void
|
||||||
ac_nir_lower_ngg_gs(nir_shader *shader,
|
ac_nir_lower_ngg_gs(nir_shader *shader,
|
||||||
|
@@ -71,6 +71,13 @@ typedef struct
|
|||||||
|
|
||||||
nir_instr *compact_arg_stores[4];
|
nir_instr *compact_arg_stores[4];
|
||||||
nir_intrinsic_instr *overwrite_args;
|
nir_intrinsic_instr *overwrite_args;
|
||||||
|
|
||||||
|
/* clip distance */
|
||||||
|
nir_variable *clip_vertex_var;
|
||||||
|
nir_variable *clipdist_neg_mask_var;
|
||||||
|
unsigned clipdist_enable_mask;
|
||||||
|
unsigned user_clip_plane_enable_mask;
|
||||||
|
bool has_clipdist;
|
||||||
} lower_ngg_nogs_state;
|
} lower_ngg_nogs_state;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
@@ -174,10 +181,6 @@ typedef struct
|
|||||||
} output_info[VARYING_SLOT_MAX];
|
} output_info[VARYING_SLOT_MAX];
|
||||||
} lower_ngg_ms_state;
|
} lower_ngg_ms_state;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
nir_variable *pre_cull_position_value_var;
|
|
||||||
} remove_culling_shader_outputs_state;
|
|
||||||
|
|
||||||
/* Per-vertex LDS layout of culling shaders */
|
/* Per-vertex LDS layout of culling shaders */
|
||||||
enum {
|
enum {
|
||||||
/* Position of the ES vertex (at the beginning for alignment reasons) */
|
/* Position of the ES vertex (at the beginning for alignment reasons) */
|
||||||
@@ -190,6 +193,8 @@ enum {
|
|||||||
lds_es_vertex_accepted = 16,
|
lds_es_vertex_accepted = 16,
|
||||||
/* ID of the thread which will export the current thread's vertex */
|
/* ID of the thread which will export the current thread's vertex */
|
||||||
lds_es_exporter_tid = 17,
|
lds_es_exporter_tid = 17,
|
||||||
|
/* bit i is set when the i'th clip distance of a vertex is negative */
|
||||||
|
lds_es_clipdist_neg_mask = 18,
|
||||||
|
|
||||||
/* Repacked arguments - also listed separately for VS and TES */
|
/* Repacked arguments - also listed separately for VS and TES */
|
||||||
lds_es_arg_0 = 20,
|
lds_es_arg_0 = 20,
|
||||||
@@ -529,10 +534,19 @@ store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *value,
|
|||||||
nir_store_var(b, var, value, writemask);
|
nir_store_var(b, var, value, writemask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
add_clipdist_bit(nir_builder *b, nir_ssa_def *dist, unsigned index, nir_variable *mask)
|
||||||
|
{
|
||||||
|
nir_ssa_def *is_neg = nir_flt(b, dist, nir_imm_float(b, 0));
|
||||||
|
nir_ssa_def *neg_mask = nir_ishl_imm(b, nir_b2i8(b, is_neg), index);
|
||||||
|
neg_mask = nir_ior(b, neg_mask, nir_load_var(b, mask));
|
||||||
|
nir_store_var(b, mask, neg_mask, 1);
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
remove_culling_shader_output(nir_builder *b, nir_instr *instr, void *state)
|
remove_culling_shader_output(nir_builder *b, nir_instr *instr, void *state)
|
||||||
{
|
{
|
||||||
remove_culling_shader_outputs_state *s = (remove_culling_shader_outputs_state *) state;
|
lower_ngg_nogs_state *s = (lower_ngg_nogs_state *) state;
|
||||||
|
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
return false;
|
return false;
|
||||||
@@ -560,7 +574,24 @@ remove_culling_shader_output(nir_builder *b, nir_instr *instr, void *state)
|
|||||||
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
|
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
|
||||||
switch (io_sem.location) {
|
switch (io_sem.location) {
|
||||||
case VARYING_SLOT_POS:
|
case VARYING_SLOT_POS:
|
||||||
store_var_components(b, s->pre_cull_position_value_var, store_val, component, writemask);
|
store_var_components(b, s->position_value_var, store_val, component, writemask);
|
||||||
|
break;
|
||||||
|
case VARYING_SLOT_CLIP_DIST0:
|
||||||
|
case VARYING_SLOT_CLIP_DIST1: {
|
||||||
|
unsigned base = io_sem.location == VARYING_SLOT_CLIP_DIST1 ? 4 : 0;
|
||||||
|
base += component;
|
||||||
|
|
||||||
|
/* valid clipdist component mask */
|
||||||
|
unsigned mask = (s->clipdist_enable_mask >> base) & writemask;
|
||||||
|
u_foreach_bit(i, mask) {
|
||||||
|
add_clipdist_bit(b, nir_channel(b, store_val, i), base + i,
|
||||||
|
s->clipdist_neg_mask_var);
|
||||||
|
s->has_clipdist = true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VARYING_SLOT_CLIP_VERTEX:
|
||||||
|
store_var_components(b, s->clip_vertex_var, store_val, component, writemask);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@@ -572,14 +603,10 @@ remove_culling_shader_output(nir_builder *b, nir_instr *instr, void *state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
remove_culling_shader_outputs(nir_shader *culling_shader, lower_ngg_nogs_state *nogs_state, nir_variable *pre_cull_position_value_var)
|
remove_culling_shader_outputs(nir_shader *culling_shader, lower_ngg_nogs_state *nogs_state)
|
||||||
{
|
{
|
||||||
remove_culling_shader_outputs_state s = {
|
|
||||||
.pre_cull_position_value_var = pre_cull_position_value_var,
|
|
||||||
};
|
|
||||||
|
|
||||||
nir_shader_instructions_pass(culling_shader, remove_culling_shader_output,
|
nir_shader_instructions_pass(culling_shader, remove_culling_shader_output,
|
||||||
nir_metadata_block_index | nir_metadata_dominance, &s);
|
nir_metadata_block_index | nir_metadata_dominance, nogs_state);
|
||||||
|
|
||||||
/* Remove dead code resulting from the deleted outputs. */
|
/* Remove dead code resulting from the deleted outputs. */
|
||||||
bool progress;
|
bool progress;
|
||||||
@@ -1163,6 +1190,38 @@ cull_primitive_accepted(nir_builder *b, void *state)
|
|||||||
nir_store_shared(b, nir_imm_intN_t(b, 1, 8), s->vtx_addr[vtx], .base = lds_es_vertex_accepted);
|
nir_store_shared(b, nir_imm_intN_t(b, 1, 8), s->vtx_addr[vtx], .base = lds_es_vertex_accepted);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
clipdist_culling_es_part(nir_builder *b, lower_ngg_nogs_state *nogs_state,
|
||||||
|
nir_ssa_def *es_vertex_lds_addr)
|
||||||
|
{
|
||||||
|
/* no gl_ClipDistance used but we have user defined clip plane */
|
||||||
|
if (nogs_state->user_clip_plane_enable_mask && !nogs_state->has_clipdist) {
|
||||||
|
/* use gl_ClipVertex if defined */
|
||||||
|
nir_variable *clip_vertex_var =
|
||||||
|
b->shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX) ?
|
||||||
|
nogs_state->clip_vertex_var : nogs_state->position_value_var;
|
||||||
|
nir_ssa_def *clip_vertex = nir_load_var(b, clip_vertex_var);
|
||||||
|
|
||||||
|
/* clip against user defined clip planes */
|
||||||
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
|
if (!(nogs_state->user_clip_plane_enable_mask & BITFIELD_BIT(i)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_ssa_def *plane = nir_load_user_clip_plane(b, .ucp_id = i);
|
||||||
|
nir_ssa_def *dist = nir_fdot(b, clip_vertex, plane);
|
||||||
|
add_clipdist_bit(b, dist, i, nogs_state->clipdist_neg_mask_var);
|
||||||
|
}
|
||||||
|
|
||||||
|
nogs_state->has_clipdist = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* store clipdist_neg_mask to LDS for culling latter in gs thread */
|
||||||
|
if (nogs_state->has_clipdist) {
|
||||||
|
nir_ssa_def *mask = nir_load_var(b, nogs_state->clipdist_neg_mask_var);
|
||||||
|
nir_store_shared(b, mask, es_vertex_lds_addr, .base = lds_es_clipdist_neg_mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *nogs_state)
|
add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *nogs_state)
|
||||||
{
|
{
|
||||||
@@ -1201,6 +1260,13 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
|
|||||||
nir_local_variable_create(impl, glsl_uint_type(), "repacked_arg_3"),
|
nir_local_variable_create(impl, glsl_uint_type(), "repacked_arg_3"),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (nogs_state->clipdist_enable_mask || nogs_state->user_clip_plane_enable_mask) {
|
||||||
|
nogs_state->clip_vertex_var =
|
||||||
|
nir_local_variable_create(impl, glsl_vec4_type(), "clip_vertex");
|
||||||
|
nogs_state->clipdist_neg_mask_var =
|
||||||
|
nir_local_variable_create(impl, glsl_uint8_t_type(), "clipdist_neg_mask");
|
||||||
|
}
|
||||||
|
|
||||||
/* Top part of the culling shader (aka. position shader part)
|
/* Top part of the culling shader (aka. position shader part)
|
||||||
*
|
*
|
||||||
* We clone the full ES shader and emit it here, but we only really care
|
* We clone the full ES shader and emit it here, but we only really care
|
||||||
@@ -1247,7 +1313,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
|
|||||||
|
|
||||||
/* Remove all non-position outputs, and put the position output into the variable. */
|
/* Remove all non-position outputs, and put the position output into the variable. */
|
||||||
nir_metadata_preserve(impl, nir_metadata_none);
|
nir_metadata_preserve(impl, nir_metadata_none);
|
||||||
remove_culling_shader_outputs(b->shader, nogs_state, position_value_var);
|
remove_culling_shader_outputs(b->shader, nogs_state);
|
||||||
b->cursor = nir_after_cf_list(&impl->body);
|
b->cursor = nir_after_cf_list(&impl->body);
|
||||||
|
|
||||||
/* Run culling algorithms if culling is enabled.
|
/* Run culling algorithms if culling is enabled.
|
||||||
@@ -1275,6 +1341,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
|
|||||||
|
|
||||||
/* Clear out the ES accepted flag in LDS */
|
/* Clear out the ES accepted flag in LDS */
|
||||||
nir_store_shared(b, nir_imm_zero(b, 1, 8), es_vertex_lds_addr, .align_mul = 4, .base = lds_es_vertex_accepted);
|
nir_store_shared(b, nir_imm_zero(b, 1, 8), es_vertex_lds_addr, .align_mul = 4, .base = lds_es_vertex_accepted);
|
||||||
|
|
||||||
|
/* For clipdist culling */
|
||||||
|
clipdist_culling_es_part(b, nogs_state, es_vertex_lds_addr);
|
||||||
}
|
}
|
||||||
nir_pop_if(b, if_es_thread);
|
nir_pop_if(b, if_es_thread);
|
||||||
|
|
||||||
@@ -1308,8 +1377,23 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
|
|||||||
pos[vtx][1] = nir_channel(b, xy, 1);
|
pos[vtx][1] = nir_channel(b, xy, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nir_ssa_def *accepted_by_clipdist;
|
||||||
|
if (nogs_state->has_clipdist) {
|
||||||
|
nir_ssa_def *clipdist_neg_mask = nir_imm_intN_t(b, 0xff, 8);
|
||||||
|
for (unsigned vtx = 0; vtx < nogs_state->num_vertices_per_primitives; ++vtx) {
|
||||||
|
nir_ssa_def *mask =
|
||||||
|
nir_load_shared(b, 1, 8, nogs_state->vtx_addr[vtx],
|
||||||
|
.base = lds_es_clipdist_neg_mask);
|
||||||
|
clipdist_neg_mask = nir_iand(b, clipdist_neg_mask, mask);
|
||||||
|
}
|
||||||
|
/* primitive is culled if any plane's clipdist of all vertices are negative */
|
||||||
|
accepted_by_clipdist = nir_ieq_imm(b, clipdist_neg_mask, 0);
|
||||||
|
} else {
|
||||||
|
accepted_by_clipdist = nir_imm_bool(b, true);
|
||||||
|
}
|
||||||
|
|
||||||
/* See if the current primitive is accepted */
|
/* See if the current primitive is accepted */
|
||||||
ac_nir_cull_primitive(b, nir_imm_bool(b, true), pos,
|
ac_nir_cull_primitive(b, accepted_by_clipdist, pos,
|
||||||
nogs_state->num_vertices_per_primitives,
|
nogs_state->num_vertices_per_primitives,
|
||||||
cull_primitive_accepted, nogs_state);
|
cull_primitive_accepted, nogs_state);
|
||||||
}
|
}
|
||||||
@@ -1414,7 +1498,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
|
|||||||
bool provoking_vtx_last,
|
bool provoking_vtx_last,
|
||||||
bool use_edgeflags,
|
bool use_edgeflags,
|
||||||
bool has_prim_query,
|
bool has_prim_query,
|
||||||
uint32_t instance_rate_inputs)
|
uint32_t instance_rate_inputs,
|
||||||
|
uint32_t clipdist_enable_mask,
|
||||||
|
uint32_t user_clip_plane_enable_mask)
|
||||||
{
|
{
|
||||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||||
assert(impl);
|
assert(impl);
|
||||||
@@ -1443,6 +1529,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
|
|||||||
.max_es_num_vertices = max_num_es_vertices,
|
.max_es_num_vertices = max_num_es_vertices,
|
||||||
.wave_size = wave_size,
|
.wave_size = wave_size,
|
||||||
.instance_rate_inputs = instance_rate_inputs,
|
.instance_rate_inputs = instance_rate_inputs,
|
||||||
|
.clipdist_enable_mask = clipdist_enable_mask,
|
||||||
|
.user_clip_plane_enable_mask = user_clip_plane_enable_mask,
|
||||||
};
|
};
|
||||||
|
|
||||||
const bool need_prim_id_store_shared =
|
const bool need_prim_id_store_shared =
|
||||||
@@ -2087,8 +2175,11 @@ ngg_gs_cull_primitive(nir_builder *b, nir_ssa_def *tid_in_tg, nir_ssa_def *max_v
|
|||||||
pos[i][1] = nir_fdiv(b, pos[i][1], pos[i][3]);
|
pos[i][1] = nir_fdiv(b, pos[i][1], pos[i][3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* TODO: support clipdist culling in GS */
|
||||||
|
nir_ssa_def *accepted_by_clipdist = nir_imm_bool(b, true);
|
||||||
|
|
||||||
nir_ssa_def *accepted = ac_nir_cull_primitive(
|
nir_ssa_def *accepted = ac_nir_cull_primitive(
|
||||||
b, nir_imm_bool(b, true), pos, s->num_vertices_per_primitive, NULL, NULL);
|
b, accepted_by_clipdist, pos, s->num_vertices_per_primitive, NULL, NULL);
|
||||||
|
|
||||||
nir_if *if_rejected = nir_push_if(b, nir_inot(b, accepted));
|
nir_if *if_rejected = nir_push_if(b, nir_inot(b, accepted));
|
||||||
{
|
{
|
||||||
|
@@ -1334,7 +1334,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
|
|||||||
info->workgroup_size, info->wave_size, info->has_ngg_culling,
|
info->workgroup_size, info->wave_size, info->has_ngg_culling,
|
||||||
info->has_ngg_early_prim_export, info->is_ngg_passthrough, export_prim_id,
|
info->has_ngg_early_prim_export, info->is_ngg_passthrough, export_prim_id,
|
||||||
pl_key->vs.provoking_vtx_last, false, pl_key->primitives_generated_query,
|
pl_key->vs.provoking_vtx_last, false, pl_key->primitives_generated_query,
|
||||||
pl_key->vs.instance_rate_inputs);
|
pl_key->vs.instance_rate_inputs, 0, 0);
|
||||||
|
|
||||||
/* Increase ESGS ring size so the LLVM binary contains the correct LDS size. */
|
/* Increase ESGS ring size so the LLVM binary contains the correct LDS size. */
|
||||||
ngg_stage->info.ngg_info.esgs_ring_size = nir->info.shared_size;
|
ngg_stage->info.ngg_info.esgs_ring_size = nir->info.shared_size;
|
||||||
|
Reference in New Issue
Block a user