iris: Rework zeroing of stream output buffer offsets

The previous mechanism was a bit fragile.  We stored the zero offset
in the pre-baked packet, and used an flag to override 0xFFFFFFFF
(append) offsets until our first emit - then prohibited anyone from
trying to re-emit the packet by flagging IRIS_DIRTY_SO_BUFFERS,
because that would re-emit the version with the zeroing of the offset.

Now, we always store 0xFFFFFFFF in the pre-baked packet, and use a
flag to override it to zero on the first emit.  That way, we can
re-emit that packet at any time, and it'll just keep appending.

Reviewed-by: Zoltán Böszörményi <zboszor@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8964>
This commit is contained in:
Kenneth Graunke
2021-02-02 17:02:05 -08:00
parent e40fafa991
commit 08e04ddd2c
3 changed files with 34 additions and 16 deletions

View File

@@ -537,8 +537,8 @@ struct iris_stream_output_target {
/** Stride (bytes-per-vertex) during this transform feedback operation */
uint16_t stride;
/** Has 3DSTATE_SO_BUFFER actually been emitted, zeroing the offsets? */
bool zeroed;
/** Does the next 3DSTATE_SO_BUFFER need to zero the offsets? */
bool zero_offset;
};
/**

View File

@@ -265,11 +265,8 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
return;
/* We can't safely re-emit 3DSTATE_SO_BUFFERS because it may zero the
* write offsets, changing the behavior.
*/
if (INTEL_DEBUG & DEBUG_REEMIT) {
ice->state.dirty |= IRIS_ALL_DIRTY_FOR_RENDER & ~IRIS_DIRTY_SO_BUFFERS;
ice->state.dirty |= IRIS_ALL_DIRTY_FOR_RENDER;
ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_FOR_RENDER;
}

View File

@@ -3809,13 +3809,18 @@ iris_set_stream_output_targets(struct pipe_context *ctx,
*/
assert(offset == 0 || offset == 0xFFFFFFFF);
/* We might be called by Begin (offset = 0), Pause, then Resume
* (offset = 0xFFFFFFFF) before ever drawing (where these commands
* will actually be sent to the GPU). In this case, we don't want
* to append - we still want to do our initial zeroing.
/* When we're first called with an offset of 0, we want the next
* 3DSTATE_SO_BUFFER packets to reset the offset to the beginning.
* Any further times we emit those packets, we want to use 0xFFFFFFFF
* to continue appending from the current offset.
*
* Note that we might be called by Begin (offset = 0), Pause, then
* Resume (offset = 0xFFFFFFFF) before ever drawing (where these
* commands will actually be sent to the GPU). In this case, we
* don't want to append - we still want to do our initial zeroing.
*/
if (!tgt->zeroed)
offset = 0;
if (offset == 0)
tgt->zero_offset = true;
iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob) {
#if GEN_GEN < 12
@@ -3833,10 +3838,10 @@ iris_set_stream_output_targets(struct pipe_context *ctx,
sob.MOCS = iris_mocs(res->bo, &screen->isl_dev, 0);
sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1;
sob.StreamOffset = offset;
sob.StreamOutputBufferOffsetAddress =
rw_bo(NULL, iris_resource_bo(tgt->offset.res)->gtt_offset +
tgt->offset.offset, IRIS_DOMAIN_OTHER_WRITE);
sob.StreamOffset = 0xFFFFFFFF; /* not offset, see above */
}
}
@@ -6000,18 +6005,34 @@ iris_upload_dirty_render_state(struct iris_context *ice,
if (ice->state.streamout_active) {
if (dirty & IRIS_DIRTY_SO_BUFFERS) {
iris_batch_emit(batch, genx->so_buffers,
4 * 4 * GENX(3DSTATE_SO_BUFFER_length));
for (int i = 0; i < 4; i++) {
struct iris_stream_output_target *tgt =
(void *) ice->state.so_target[i];
const uint32_t dwords = GENX(3DSTATE_SO_BUFFER_length);
uint32_t *so_buffers = genx->so_buffers + i * dwords;
bool zero_offset = false;
if (tgt) {
tgt->zeroed = true;
zero_offset = tgt->zero_offset;
iris_use_pinned_bo(batch, iris_resource_bo(tgt->base.buffer),
true, IRIS_DOMAIN_OTHER_WRITE);
iris_use_pinned_bo(batch, iris_resource_bo(tgt->offset.res),
true, IRIS_DOMAIN_OTHER_WRITE);
}
if (zero_offset) {
/* Skip the last DWord which contains "Stream Offset" of
* 0xFFFFFFFF and instead emit a dword of zero directly.
*/
STATIC_ASSERT(GENX(3DSTATE_SO_BUFFER_StreamOffset_start) ==
32 * (dwords - 1));
const uint32_t zero = 0;
iris_batch_emit(batch, so_buffers, 4 * (dwords - 1));
iris_batch_emit(batch, &zero, sizeof(zero));
tgt->zero_offset = false;
} else {
iris_batch_emit(batch, so_buffers, 4 * dwords);
}
}
}