From fb59a9fb08629b6871a87983e1feabdfd032e5ff Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 3 Jan 2025 07:59:58 -0800 Subject: [PATCH] freedreno/a6xx: Use LATE_Z with OC + discard If occlusion query is used, and the FS discards samples, we need to use LATE_Z in order to get the sample count after the FS. Signed-off-by: Rob Clark Part-of: --- src/gallium/drivers/freedreno/a5xx/fd5_context.h | 3 --- src/gallium/drivers/freedreno/a5xx/fd5_emit.h | 2 +- src/gallium/drivers/freedreno/a5xx/fd5_query.c | 9 +++++++-- src/gallium/drivers/freedreno/a6xx/fd6_emit.cc | 12 ++++++------ src/gallium/drivers/freedreno/a6xx/fd6_query.cc | 14 ++++++++++++++ src/gallium/drivers/freedreno/freedreno_context.h | 2 ++ 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.h b/src/gallium/drivers/freedreno/a5xx/fd5_context.h index 3da43332675..cfc32af41c3 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_context.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_context.h @@ -36,9 +36,6 @@ struct fd5_context { /* storage for ctx->last.key: */ struct ir3_shader_key last_key; - /* number of active samples-passed queries: */ - int samples_passed_queries; - /* cached state about current emitted shader program (3d): */ unsigned max_loc; }; diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h index 29509e0c21b..2c40d464b3c 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h @@ -154,7 +154,7 @@ fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt * Other bits seem to depend on query state, like if samples-passed * query is active. */ - bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0); + bool samples_passed = (ctx->occlusion_queries_active > 0); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */ COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) | diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/src/gallium/drivers/freedreno/a5xx/fd5_query.c index cd66ad19d5a..738228a6369 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_query.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_query.c @@ -47,7 +47,9 @@ FD_DEFINE_CAST(fd_acc_query_sample, fd5_query_sample); static void occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) + assert_dt { + struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->draw; OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); @@ -61,12 +63,14 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) fd5_event_write(batch, ring, ZPASS_DONE, false); fd_reset_wfi(batch); - fd5_context(batch->ctx)->samples_passed_queries++; + ctx->occlusion_queries_active++; } static void occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) + assert_dt { + struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_MEM_WRITE, 4); @@ -102,7 +106,8 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ - fd5_context(batch->ctx)->samples_passed_queries--; + assert(ctx->occlusion_queries_active > 0); + ctx->occlusion_queries_active--; } static void diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index 8ab9b4ce79b..9326503cb0c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -84,19 +84,19 @@ compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt return emit->prog->lrz_mask.z_mode; struct fd_context *ctx = emit->ctx; - struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); const struct ir3_shader_variant *fs = emit->fs; if (!zsa->base.depth_enabled) { return A6XX_LATE_Z; } else if ((fs->has_kill || zsa->alpha_test) && - (zsa->writes_zs || !pfb->zsbuf)) { - /* Slightly odd, but seems like the hw wants us to select - * LATE_Z mode if there is no depth buffer + discard. Either - * that, or when occlusion query is enabled. See: + (zsa->writes_zs || ctx->occlusion_queries_active)) { + /* If occlusion queries are active, we don't want to use EARLY_Z + * since that will count samples that are discarded by fs * - * dEQP-GLES31.functional.fbo.no_attachments.* + * I'm not entirely sure about the interaction with LRZ, since + * that could discard samples that would otherwise only be + * hidden by a later draw. */ return lrz_valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z; } else { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_query.cc b/src/gallium/drivers/freedreno/a6xx/fd6_query.cc index 1941195b9b3..9f6a7c77db1 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_query.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_query.cc @@ -96,6 +96,12 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) ); } + ctx->occlusion_queries_active++; + + /* Just directly bash the gen specific LRZ dirty bit, since we don't + * need to re-emit any other LRZ related state: + */ + ctx->gen_dirty |= FD6_GROUP_LRZ; } template @@ -170,6 +176,14 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)), ); } + + assert(ctx->occlusion_queries_active > 0); + ctx->occlusion_queries_active--; + + /* Just directly bash the gen specific LRZ dirty bit, since we don't + * need to re-emit any other LRZ related state: + */ + ctx->gen_dirty |= FD6_GROUP_LRZ; } static void diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 5dd6e5f5e75..d7b53280f87 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -319,6 +319,8 @@ struct fd_context { */ bool active_queries dt; + uint32_t occlusion_queries_active dt; + /* shaders used by clear, and gmem->mem blits: */ struct fd_program_stateobj solid_prog; // TODO move to screen? struct fd_program_stateobj solid_layered_prog;