anv: Implement VK_EXT_transform_feedback on Gen7

Things work a little different on Gen7 than they do on Gen8+.  In
particular, SOBufferEnable lives in 3DSTATE_STREAMOUT but BufferPitch
lives in 3DSTATE_SO_BUFFER.  This leaves us having to marshal data
around a bit more than we did on Gen8.  Still, it's not too bad.

Normally, I don't spend much time on Gen7 but XFB just became a hard
requirement for DXVK so it stopped working for all our Haswell users.
Let's get them happily playing their games again. 😸

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3532
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6997>
This commit is contained in:
Jason Ekstrand
2020-10-04 09:15:36 -05:00
committed by Marge Bot
parent 9831888b68
commit d82826ad44
5 changed files with 38 additions and 13 deletions

View File

@@ -2126,7 +2126,9 @@ void anv_GetPhysicalDeviceProperties2(
props->transformFeedbackQueries = true;
props->transformFeedbackStreamsLinesTriangles = false;
props->transformFeedbackRasterizationStreamSelect = false;
props->transformFeedbackDraw = true;
/* This requires MI_MATH */
props->transformFeedbackDraw = pdevice->info.is_haswell ||
pdevice->info.gen >= 8;
break;
}

View File

@@ -163,7 +163,7 @@ EXTENSIONS = [
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
Extension('VK_EXT_subgroup_size_control', 2, True),
Extension('VK_EXT_texel_buffer_alignment', 1, True),
Extension('VK_EXT_transform_feedback', 1, 'device->info.gen >= 8'),
Extension('VK_EXT_transform_feedback', 1, True),
Extension('VK_EXT_vertex_attribute_divisor', 3, True),
Extension('VK_EXT_ycbcr_image_arrays', 1, True),
Extension('VK_ANDROID_external_memory_android_hardware_buffer', 3, 'ANDROID'),

View File

@@ -3471,6 +3471,7 @@ struct anv_graphics_pipeline {
uint32_t sf[7];
uint32_t depth_stencil_state[3];
uint32_t clip[4];
uint32_t xfb_bo_pitch[4];
} gen7;
struct {

View File

@@ -3424,8 +3424,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.gfx.vb_dirty &= ~vb_emit;
#if GEN_GEN >= 8
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) ||
(GEN_GEN == 7 && (cmd_buffer->state.gfx.dirty &
ANV_CMD_DIRTY_PIPELINE))) {
/* We don't need any per-buffer dirty tracking because you're not
* allowed to bind different XFB buffers while XFB is enabled.
*/
@@ -3440,13 +3441,23 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
#endif
if (cmd_buffer->state.xfb_enabled && xfb->buffer && xfb->size != 0) {
sob.SOBufferEnable = true;
sob.MOCS = cmd_buffer->device->isl_dev.mocs.internal,
sob.StreamOffsetWriteEnable = false;
sob.SurfaceBaseAddress = anv_address_add(xfb->buffer->address,
xfb->offset);
#if GEN_GEN >= 8
sob.SOBufferEnable = true;
sob.StreamOffsetWriteEnable = false;
/* Size is in DWords - 1 */
sob.SurfaceSize = DIV_ROUND_UP(xfb->size, 4) - 1;
#else
/* We don't have SOBufferEnable in 3DSTATE_SO_BUFFER on Gen7 so
* we trust in SurfaceEndAddress = SurfaceBaseAddress = 0 (the
* default for an empty SO_BUFFER packet) to disable them.
*/
sob.SurfacePitch = pipeline->gen7.xfb_bo_pitch[idx];
sob.SurfaceEndAddress = anv_address_add(xfb->buffer->address,
xfb->offset + xfb->size);
#endif
}
}
}
@@ -3455,7 +3466,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
if (GEN_GEN >= 10)
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
}
#endif
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);

View File

@@ -1370,7 +1370,6 @@ static void
emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
const VkPipelineRasterizationStateCreateInfo *rs_info)
{
#if GEN_GEN >= 8
const struct brw_vue_prog_data *prog_data =
anv_pipeline_get_last_vue_prog_data(pipeline);
const struct brw_vue_map *vue_map = &prog_data->vue_map;
@@ -1382,12 +1381,10 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info;
else
xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
#endif
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so) {
so.RenderingDisable = rs_info->rasterizerDiscardEnable;
#if GEN_GEN >= 8
if (xfb_info) {
so.SOFunctionEnable = true;
so.SOStatisticsEnable = true;
@@ -1397,10 +1394,28 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
so.RenderStreamSelect = stream_info ?
stream_info->rasterizationStream : 0;
#if GEN_GEN >= 8
so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
#else
pipeline->gen7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
pipeline->gen7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
pipeline->gen7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
pipeline->gen7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
/* On Gen7, the SO buffer enables live in 3DSTATE_STREAMOUT which
* is a bit inconvenient because we don't know what buffers will
* actually be enabled until draw time. We do our best here by
* setting them based on buffers_written and we disable them
* as-needed at draw time by setting EndAddress = BaseAddress.
*/
so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
#endif
int urb_entry_read_offset = 0;
int urb_entry_read_length =
@@ -1419,10 +1434,8 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
so.Stream3VertexReadOffset = urb_entry_read_offset;
so.Stream3VertexReadLength = urb_entry_read_length - 1;
}
#endif /* GEN_GEN >= 8 */
}
#if GEN_GEN >= 8
if (xfb_info) {
struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128];
int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0};
@@ -1521,7 +1534,6 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
});
}
}
#endif /* GEN_GEN >= 8 */
}
static uint32_t