freedreno/a6xx: Use LATE_Z with OC + discard
If occlusion query is used, and the FS discards samples, we need to use LATE_Z in order to get the sample count after the FS. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32869>
This commit is contained in:
@@ -36,9 +36,6 @@ struct fd5_context {
|
||||
/* storage for ctx->last.key: */
|
||||
struct ir3_shader_key last_key;
|
||||
|
||||
/* number of active samples-passed queries: */
|
||||
int samples_passed_queries;
|
||||
|
||||
/* cached state about current emitted shader program (3d): */
|
||||
unsigned max_loc;
|
||||
};
|
||||
|
@@ -154,7 +154,7 @@ fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
|
||||
* Other bits seem to depend on query state, like if samples-passed
|
||||
* query is active.
|
||||
*/
|
||||
bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
|
||||
bool samples_passed = (ctx->occlusion_queries_active > 0);
|
||||
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
|
||||
OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
|
||||
COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
|
||||
|
@@ -47,7 +47,9 @@ FD_DEFINE_CAST(fd_acc_query_sample, fd5_query_sample);
|
||||
|
||||
static void
|
||||
occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
assert_dt
|
||||
{
|
||||
struct fd_context *ctx = batch->ctx;
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
|
||||
@@ -61,12 +63,14 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
fd5_event_write(batch, ring, ZPASS_DONE, false);
|
||||
fd_reset_wfi(batch);
|
||||
|
||||
fd5_context(batch->ctx)->samples_passed_queries++;
|
||||
ctx->occlusion_queries_active++;
|
||||
}
|
||||
|
||||
static void
|
||||
occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
assert_dt
|
||||
{
|
||||
struct fd_context *ctx = batch->ctx;
|
||||
struct fd_ringbuffer *ring = batch->draw;
|
||||
|
||||
OUT_PKT7(ring, CP_MEM_WRITE, 4);
|
||||
@@ -102,7 +106,8 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
|
||||
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
|
||||
|
||||
fd5_context(batch->ctx)->samples_passed_queries--;
|
||||
assert(ctx->occlusion_queries_active > 0);
|
||||
ctx->occlusion_queries_active--;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -84,19 +84,19 @@ compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt
|
||||
return emit->prog->lrz_mask.z_mode;
|
||||
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
|
||||
struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
|
||||
const struct ir3_shader_variant *fs = emit->fs;
|
||||
|
||||
if (!zsa->base.depth_enabled) {
|
||||
return A6XX_LATE_Z;
|
||||
} else if ((fs->has_kill || zsa->alpha_test) &&
|
||||
(zsa->writes_zs || !pfb->zsbuf)) {
|
||||
/* Slightly odd, but seems like the hw wants us to select
|
||||
* LATE_Z mode if there is no depth buffer + discard. Either
|
||||
* that, or when occlusion query is enabled. See:
|
||||
(zsa->writes_zs || ctx->occlusion_queries_active)) {
|
||||
/* If occlusion queries are active, we don't want to use EARLY_Z
|
||||
* since that will count samples that are discarded by fs
|
||||
*
|
||||
* dEQP-GLES31.functional.fbo.no_attachments.*
|
||||
* I'm not entirely sure about the interaction with LRZ, since
|
||||
* that could discard samples that would otherwise only be
|
||||
* hidden by a later draw.
|
||||
*/
|
||||
return lrz_valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
|
||||
} else {
|
||||
|
@@ -96,6 +96,12 @@ occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
|
||||
);
|
||||
}
|
||||
|
||||
ctx->occlusion_queries_active++;
|
||||
|
||||
/* Just directly bash the gen specific LRZ dirty bit, since we don't
|
||||
* need to re-emit any other LRZ related state:
|
||||
*/
|
||||
ctx->gen_dirty |= FD6_GROUP_LRZ;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
@@ -170,6 +176,14 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
|
||||
EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
|
||||
);
|
||||
}
|
||||
|
||||
assert(ctx->occlusion_queries_active > 0);
|
||||
ctx->occlusion_queries_active--;
|
||||
|
||||
/* Just directly bash the gen specific LRZ dirty bit, since we don't
|
||||
* need to re-emit any other LRZ related state:
|
||||
*/
|
||||
ctx->gen_dirty |= FD6_GROUP_LRZ;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -319,6 +319,8 @@ struct fd_context {
|
||||
*/
|
||||
bool active_queries dt;
|
||||
|
||||
uint32_t occlusion_queries_active dt;
|
||||
|
||||
/* shaders used by clear, and gmem->mem blits: */
|
||||
struct fd_program_stateobj solid_prog; // TODO move to screen?
|
||||
struct fd_program_stateobj solid_layered_prog;
|
||||
|
Reference in New Issue
Block a user