freedreno/a6xx: Use LATE_Z with OC + discard

If occlusion query is used, and the FS discards samples, we need to use
LATE_Z in order to get the sample count after the FS.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32869>
This commit is contained in:
Rob Clark
2025-01-03 07:59:58 -08:00
committed by Marge Bot
parent 98cdb9349a
commit fb59a9fb08
6 changed files with 30 additions and 12 deletions

View File

@@ -36,9 +36,6 @@ struct fd5_context {
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;
/* number of active samples-passed queries: */
int samples_passed_queries;
/* cached state about current emitted shader program (3d): */
unsigned max_loc;
};

View File

@@ -154,7 +154,7 @@ fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
* Other bits seem to depend on query state, like if samples-passed
* query is active.
*/
bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
bool samples_passed = (ctx->occlusion_queries_active > 0);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |

View File

@@ -47,7 +47,9 @@ FD_DEFINE_CAST(fd_acc_query_sample, fd5_query_sample);
static void
occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
{
struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
@@ -61,12 +63,14 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
fd5_event_write(batch, ring, ZPASS_DONE, false);
fd_reset_wfi(batch);
fd5_context(batch->ctx)->samples_passed_queries++;
ctx->occlusion_queries_active++;
}
static void
occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
assert_dt
{
struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->draw;
OUT_PKT7(ring, CP_MEM_WRITE, 4);
@@ -102,7 +106,8 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
fd5_context(batch->ctx)->samples_passed_queries--;
assert(ctx->occlusion_queries_active > 0);
ctx->occlusion_queries_active--;
}
static void

View File

@@ -84,19 +84,19 @@ compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt
return emit->prog->lrz_mask.z_mode;
struct fd_context *ctx = emit->ctx;
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
const struct ir3_shader_variant *fs = emit->fs;
if (!zsa->base.depth_enabled) {
return A6XX_LATE_Z;
} else if ((fs->has_kill || zsa->alpha_test) &&
(zsa->writes_zs || !pfb->zsbuf)) {
/* Slightly odd, but seems like the hw wants us to select
* LATE_Z mode if there is no depth buffer + discard. Either
* that, or when occlusion query is enabled. See:
(zsa->writes_zs || ctx->occlusion_queries_active)) {
/* If occlusion queries are active, we don't want to use EARLY_Z
* since that will count samples that are discarded by fs
*
* dEQP-GLES31.functional.fbo.no_attachments.*
* I'm not entirely sure about the interaction with LRZ, since
* that could discard samples that would otherwise only be
* hidden by a later draw.
*/
return lrz_valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
} else {

View File

@@ -96,6 +96,12 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
);
}
ctx->occlusion_queries_active++;
/* Just directly bash the gen specific LRZ dirty bit, since we don't
* need to re-emit any other LRZ related state:
*/
ctx->gen_dirty |= FD6_GROUP_LRZ;
}
template <chip CHIP>
@@ -170,6 +176,14 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
);
}
assert(ctx->occlusion_queries_active > 0);
ctx->occlusion_queries_active--;
/* Just directly bash the gen specific LRZ dirty bit, since we don't
* need to re-emit any other LRZ related state:
*/
ctx->gen_dirty |= FD6_GROUP_LRZ;
}
static void

View File

@@ -319,6 +319,8 @@ struct fd_context {
*/
bool active_queries dt;
uint32_t occlusion_queries_active dt;
/* shaders used by clear, and gmem->mem blits: */
struct fd_program_stateobj solid_prog; // TODO move to screen?
struct fd_program_stateobj solid_layered_prog;