asahi: Implement ARB_cull_distance

Passes KHR-GL33.cull_distance.* and the piglits.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26614>
This commit is contained in:
Alyssa Rosenzweig
2023-11-29 17:40:42 -04:00
parent ba077d899e
commit c89f0becf7
9 changed files with 142 additions and 2 deletions

View File

@@ -15,3 +15,4 @@ GL_ARB_shader_texture_image_samples on Asahi
GL_ARB_indirect_parameters on Asahi
GL_ARB_viewport_array on Asahi
GL_ARB_fragment_layer_viewport on Asahi
GL_ARB_cull_distance on Asahi

View File

@@ -2502,6 +2502,10 @@ agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings,
/* These are always flat-shaded from the FS perspective */
key->vs.outputs_flat_shaded |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
/* The internal cull distance slots are always linearly-interpolated */
key->vs.outputs_linear_shaded |=
BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
assert(!(key->vs.outputs_flat_shaded & key->vs.outputs_linear_shaded));
/* Smooth 32-bit user bindings go next */
@@ -2941,6 +2945,10 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
}
} else if (nir->info.stage == MESA_SHADER_VERTEX) {
out->has_edgeflags = nir->info.outputs_written & VARYING_BIT_EDGE;
out->cull_distance_size = nir->info.cull_distance_array_size;
if (out->cull_distance_size)
NIR_PASS_V(nir, agx_nir_lower_cull_distance_vs);
}
/* Clean up deref gunk after lowering I/O */

View File

@@ -98,6 +98,7 @@ union agx_varyings {
struct agx_uncompiled_shader_info {
uint64_t inputs_flat_shaded;
uint64_t inputs_linear_shaded;
uint8_t cull_distance_size;
bool has_edgeflags;
};
@@ -243,6 +244,9 @@ void agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
bool agx_nir_lower_discard_zs_emit(nir_shader *s);
void agx_nir_lower_cull_distance_fs(struct nir_shader *s,
unsigned nr_distances);
bool agx_nir_needs_texture_crawl(nir_instr *instr);
void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,

View File

@@ -16,5 +16,6 @@ bool agx_nir_fuse_algebraic_late(struct nir_shader *shader);
bool agx_nir_fence_images(struct nir_shader *shader);
bool agx_nir_lower_multisampled_image_store(struct nir_shader *s);
void agx_nir_lower_layer(struct nir_shader *s);
void agx_nir_lower_cull_distance_vs(struct nir_shader *s);
#endif

View File

@@ -0,0 +1,112 @@
/*
* Copyright 2023 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "util/macros.h"
#include "agx_compile.h"
#include "agx_nir.h"
#include "glsl_types.h"
/*
* Lower cull distance to discard. From the spec:
*
* If the cull distance for any enabled cull half-space is negative for all
* of the vertices of the primitive under consideration, the primitive is
* discarded.
*
* We don't have a direct way to read the cull distance at non-provoking
* vertices in the fragment shader. Instead, we interpolate the quantity:
*
* cull distance >= 0.0 ? 1.0 : 0.0
*
* Then, the discard condition is equivalent to:
*
* "quantity is zero for all vertices of the primitive"
*
* which by linearity is equivalent to:
*
* quantity is zero somewhere in the primitive and quantity has zero
* first-order screen space derivatives.
*
* which we can determine with ease in the fragment shader.
*/
static bool
lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
{
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (sem.location != VARYING_SLOT_CULL_DIST0)
return false;
nir_instr *clone = nir_instr_clone(b->shader, &intr->instr);
nir_intrinsic_instr *lowered = nir_instr_as_intrinsic(clone);
b->cursor = nir_after_instr(&intr->instr);
nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0));
nir_builder_instr_insert(b, clone);
nir_src_rewrite(&lowered->src[0], v);
sem.location = VARYING_SLOT_CULL_PRIMITIVE;
nir_intrinsic_set_io_semantics(lowered, sem);
return true;
}
void
agx_nir_lower_cull_distance_vs(nir_shader *s)
{
assert(s->info.stage == MESA_SHADER_VERTEX);
assert(s->info.outputs_written & VARYING_BIT_CULL_DIST0);
nir_shader_intrinsics_pass(
s, lower_write, nir_metadata_block_index | nir_metadata_dominance, NULL);
s->info.outputs_written |=
BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
DIV_ROUND_UP(s->info.cull_distance_array_size, 4));
}
void
agx_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances)
{
assert(s->info.stage == MESA_SHADER_FRAGMENT);
assert(nr_distances > 0);
nir_builder b_ =
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
nir_builder *b = &b_;
/* Test each half-space */
nir_def *culled = nir_imm_false(b);
for (unsigned i = 0; i < nr_distances; ++i) {
/* Load the coefficient vector for this half-space. Imaginapple
* partial derivatives and the value somewhere.
*/
nir_def *cf = nir_load_coefficients_agx(
b, .component = i & 3,
.io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4),
.io_semantics.num_slots = nr_distances / 4,
.interp_mode = INTERP_MODE_NOPERSPECTIVE);
/* If the coefficients are identically zero, then the quantity is
* zero across the primtive <==> cull distance is negative across the
* primitive <==> the primitive is culled.
*/
culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cf, 0)));
}
/* Emulate primitive culling by discarding fragments */
nir_discard_if(b, culled);
s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
DIV_ROUND_UP(nr_distances, 4));
s->info.fs.uses_discard = true;
}

View File

@@ -9,6 +9,7 @@ libasahi_agx_files = files(
'agx_insert_waits.c',
'agx_nir_fence_images.c',
'agx_nir_lower_address.c',
'agx_nir_lower_cull_distance.c',
'agx_nir_lower_frag_sidefx.c',
'agx_nir_lower_sample_mask.c',
'agx_nir_lower_discard_zs_emit.c',

View File

@@ -1589,6 +1589,8 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
case PIPE_CAP_FS_FINE_DERIVATIVE:
case PIPE_CAP_CULL_DISTANCE_NOCOMBINE:
case PIPE_CAP_NIR_COMPACT_ARRAYS:
return 1;
case PIPE_CAP_CLIP_HALFZ:
@@ -1647,6 +1649,7 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_CULL_DISTANCE:
return 1;
case PIPE_CAP_SURFACE_SAMPLE_COUNT:
/* TODO: MSRTT */

View File

@@ -1817,6 +1817,12 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
NIR_PASS_V(nir, nir_lower_clip_fs, key->clip_plane_enable, false);
}
/* Similarly for cull distancing lowering */
if (key->cull_distance_size) {
NIR_PASS_V(nir, agx_nir_lower_cull_distance_fs,
key->cull_distance_size);
}
/* Discards must be lowering before lowering MSAA to handle discards */
NIR_PASS_V(nir, agx_nir_lower_discard_zs_emit);
@@ -2280,8 +2286,9 @@ agx_update_fs(struct agx_batch *batch)
* sample_mask: SAMPLE_MASK
* reduced_prim: PRIM
*/
if (!(ctx->dirty & (AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_BLEND |
AGX_DIRTY_SAMPLE_MASK | AGX_DIRTY_PRIM)))
if (!(ctx->dirty &
(AGX_DIRTY_VS_PROG | AGX_DIRTY_FS_PROG | AGX_DIRTY_RS |
AGX_DIRTY_BLEND | AGX_DIRTY_SAMPLE_MASK | AGX_DIRTY_PRIM)))
return false;
unsigned nr_samples = util_framebuffer_get_num_samples(&batch->key);
@@ -2289,6 +2296,8 @@ agx_update_fs(struct agx_batch *batch)
struct asahi_fs_shader_key key = {
.nr_cbufs = batch->key.nr_cbufs,
.cull_distance_size =
ctx->stage[MESA_SHADER_VERTEX].shader->info.cull_distance_size,
.clip_plane_enable = ctx->rast->base.clip_plane_enable,
.nr_samples = nr_samples,
.layered = util_framebuffer_get_num_layers(&batch->key) > 1,

View File

@@ -385,6 +385,7 @@ struct asahi_fs_shader_key {
*/
bool api_sample_mask;
uint8_t cull_distance_size;
uint8_t clip_plane_enable;
uint8_t nr_samples;
bool multisample;