asahi: Implement ARB_cull_distance
Passes KHR-GL33.cull_distance.* and the piglits. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26614>
This commit is contained in:
@@ -15,3 +15,4 @@ GL_ARB_shader_texture_image_samples on Asahi
|
||||
GL_ARB_indirect_parameters on Asahi
|
||||
GL_ARB_viewport_array on Asahi
|
||||
GL_ARB_fragment_layer_viewport on Asahi
|
||||
GL_ARB_cull_distance on Asahi
|
||||
|
@@ -2502,6 +2502,10 @@ agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings,
|
||||
/* These are always flat-shaded from the FS perspective */
|
||||
key->vs.outputs_flat_shaded |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
|
||||
|
||||
/* The internal cull distance slots are always linearly-interpolated */
|
||||
key->vs.outputs_linear_shaded |=
|
||||
BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
|
||||
|
||||
assert(!(key->vs.outputs_flat_shaded & key->vs.outputs_linear_shaded));
|
||||
|
||||
/* Smooth 32-bit user bindings go next */
|
||||
@@ -2941,6 +2945,10 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
out->has_edgeflags = nir->info.outputs_written & VARYING_BIT_EDGE;
|
||||
out->cull_distance_size = nir->info.cull_distance_array_size;
|
||||
|
||||
if (out->cull_distance_size)
|
||||
NIR_PASS_V(nir, agx_nir_lower_cull_distance_vs);
|
||||
}
|
||||
|
||||
/* Clean up deref gunk after lowering I/O */
|
||||
|
@@ -98,6 +98,7 @@ union agx_varyings {
|
||||
struct agx_uncompiled_shader_info {
|
||||
uint64_t inputs_flat_shaded;
|
||||
uint64_t inputs_linear_shaded;
|
||||
uint8_t cull_distance_size;
|
||||
bool has_edgeflags;
|
||||
};
|
||||
|
||||
@@ -243,6 +244,9 @@ void agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
|
||||
|
||||
bool agx_nir_lower_discard_zs_emit(nir_shader *s);
|
||||
|
||||
void agx_nir_lower_cull_distance_fs(struct nir_shader *s,
|
||||
unsigned nr_distances);
|
||||
|
||||
bool agx_nir_needs_texture_crawl(nir_instr *instr);
|
||||
|
||||
void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
||||
|
@@ -16,5 +16,6 @@ bool agx_nir_fuse_algebraic_late(struct nir_shader *shader);
|
||||
bool agx_nir_fence_images(struct nir_shader *shader);
|
||||
bool agx_nir_lower_multisampled_image_store(struct nir_shader *s);
|
||||
void agx_nir_lower_layer(struct nir_shader *s);
|
||||
void agx_nir_lower_cull_distance_vs(struct nir_shader *s);
|
||||
|
||||
#endif
|
||||
|
112
src/asahi/compiler/agx_nir_lower_cull_distance.c
Normal file
112
src/asahi/compiler/agx_nir_lower_cull_distance.c
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "util/macros.h"
|
||||
#include "agx_compile.h"
|
||||
#include "agx_nir.h"
|
||||
#include "glsl_types.h"
|
||||
|
||||
/*
|
||||
* Lower cull distance to discard. From the spec:
|
||||
*
|
||||
* If the cull distance for any enabled cull half-space is negative for all
|
||||
* of the vertices of the primitive under consideration, the primitive is
|
||||
* discarded.
|
||||
*
|
||||
* We don't have a direct way to read the cull distance at non-provoking
|
||||
* vertices in the fragment shader. Instead, we interpolate the quantity:
|
||||
*
|
||||
* cull distance >= 0.0 ? 1.0 : 0.0
|
||||
*
|
||||
* Then, the discard condition is equivalent to:
|
||||
*
|
||||
* "quantity is zero for all vertices of the primitive"
|
||||
*
|
||||
* which by linearity is equivalent to:
|
||||
*
|
||||
* quantity is zero somewhere in the primitive and quantity has zero
|
||||
* first-order screen space derivatives.
|
||||
*
|
||||
* which we can determine with ease in the fragment shader.
|
||||
*/
|
||||
|
||||
static bool
|
||||
lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
if (sem.location != VARYING_SLOT_CULL_DIST0)
|
||||
return false;
|
||||
|
||||
nir_instr *clone = nir_instr_clone(b->shader, &intr->instr);
|
||||
nir_intrinsic_instr *lowered = nir_instr_as_intrinsic(clone);
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0));
|
||||
|
||||
nir_builder_instr_insert(b, clone);
|
||||
nir_src_rewrite(&lowered->src[0], v);
|
||||
|
||||
sem.location = VARYING_SLOT_CULL_PRIMITIVE;
|
||||
nir_intrinsic_set_io_semantics(lowered, sem);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
agx_nir_lower_cull_distance_vs(nir_shader *s)
|
||||
{
|
||||
assert(s->info.stage == MESA_SHADER_VERTEX);
|
||||
assert(s->info.outputs_written & VARYING_BIT_CULL_DIST0);
|
||||
|
||||
nir_shader_intrinsics_pass(
|
||||
s, lower_write, nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
s->info.outputs_written |=
|
||||
BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
|
||||
DIV_ROUND_UP(s->info.cull_distance_array_size, 4));
|
||||
}
|
||||
|
||||
void
|
||||
agx_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances)
|
||||
{
|
||||
assert(s->info.stage == MESA_SHADER_FRAGMENT);
|
||||
assert(nr_distances > 0);
|
||||
|
||||
nir_builder b_ =
|
||||
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
|
||||
nir_builder *b = &b_;
|
||||
|
||||
/* Test each half-space */
|
||||
nir_def *culled = nir_imm_false(b);
|
||||
|
||||
for (unsigned i = 0; i < nr_distances; ++i) {
|
||||
/* Load the coefficient vector for this half-space. Imaginapple
|
||||
* partial derivatives and the value somewhere.
|
||||
*/
|
||||
nir_def *cf = nir_load_coefficients_agx(
|
||||
b, .component = i & 3,
|
||||
.io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4),
|
||||
.io_semantics.num_slots = nr_distances / 4,
|
||||
.interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
|
||||
/* If the coefficients are identically zero, then the quantity is
|
||||
* zero across the primtive <==> cull distance is negative across the
|
||||
* primitive <==> the primitive is culled.
|
||||
*/
|
||||
culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cf, 0)));
|
||||
}
|
||||
|
||||
/* Emulate primitive culling by discarding fragments */
|
||||
nir_discard_if(b, culled);
|
||||
|
||||
s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
|
||||
DIV_ROUND_UP(nr_distances, 4));
|
||||
|
||||
s->info.fs.uses_discard = true;
|
||||
}
|
@@ -9,6 +9,7 @@ libasahi_agx_files = files(
|
||||
'agx_insert_waits.c',
|
||||
'agx_nir_fence_images.c',
|
||||
'agx_nir_lower_address.c',
|
||||
'agx_nir_lower_cull_distance.c',
|
||||
'agx_nir_lower_frag_sidefx.c',
|
||||
'agx_nir_lower_sample_mask.c',
|
||||
'agx_nir_lower_discard_zs_emit.c',
|
||||
|
@@ -1589,6 +1589,8 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_FS_FINE_DERIVATIVE:
|
||||
case PIPE_CAP_CULL_DISTANCE_NOCOMBINE:
|
||||
case PIPE_CAP_NIR_COMPACT_ARRAYS:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_CLIP_HALFZ:
|
||||
@@ -1647,6 +1649,7 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_CULL_DISTANCE:
|
||||
return 1;
|
||||
case PIPE_CAP_SURFACE_SAMPLE_COUNT:
|
||||
/* TODO: MSRTT */
|
||||
|
@@ -1817,6 +1817,12 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
||||
NIR_PASS_V(nir, nir_lower_clip_fs, key->clip_plane_enable, false);
|
||||
}
|
||||
|
||||
/* Similarly for cull distancing lowering */
|
||||
if (key->cull_distance_size) {
|
||||
NIR_PASS_V(nir, agx_nir_lower_cull_distance_fs,
|
||||
key->cull_distance_size);
|
||||
}
|
||||
|
||||
/* Discards must be lowering before lowering MSAA to handle discards */
|
||||
NIR_PASS_V(nir, agx_nir_lower_discard_zs_emit);
|
||||
|
||||
@@ -2280,8 +2286,9 @@ agx_update_fs(struct agx_batch *batch)
|
||||
* sample_mask: SAMPLE_MASK
|
||||
* reduced_prim: PRIM
|
||||
*/
|
||||
if (!(ctx->dirty & (AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_BLEND |
|
||||
AGX_DIRTY_SAMPLE_MASK | AGX_DIRTY_PRIM)))
|
||||
if (!(ctx->dirty &
|
||||
(AGX_DIRTY_VS_PROG | AGX_DIRTY_FS_PROG | AGX_DIRTY_RS |
|
||||
AGX_DIRTY_BLEND | AGX_DIRTY_SAMPLE_MASK | AGX_DIRTY_PRIM)))
|
||||
return false;
|
||||
|
||||
unsigned nr_samples = util_framebuffer_get_num_samples(&batch->key);
|
||||
@@ -2289,6 +2296,8 @@ agx_update_fs(struct agx_batch *batch)
|
||||
|
||||
struct asahi_fs_shader_key key = {
|
||||
.nr_cbufs = batch->key.nr_cbufs,
|
||||
.cull_distance_size =
|
||||
ctx->stage[MESA_SHADER_VERTEX].shader->info.cull_distance_size,
|
||||
.clip_plane_enable = ctx->rast->base.clip_plane_enable,
|
||||
.nr_samples = nr_samples,
|
||||
.layered = util_framebuffer_get_num_layers(&batch->key) > 1,
|
||||
|
@@ -385,6 +385,7 @@ struct asahi_fs_shader_key {
|
||||
*/
|
||||
bool api_sample_mask;
|
||||
|
||||
uint8_t cull_distance_size;
|
||||
uint8_t clip_plane_enable;
|
||||
uint8_t nr_samples;
|
||||
bool multisample;
|
||||
|
Reference in New Issue
Block a user