anv: Use different BOs for different scratch sizes and stages
This solves a race condition where we can end up having different stages stomp on each other because they're all trying to scratch in the same BO but they have different views of its layout. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: "12.0" <mesa-stable@lists.freedesktop.org>
This commit is contained in:
@@ -878,7 +878,7 @@ VkResult anv_CreateDevice(
|
||||
|
||||
anv_bo_init_new(&device->workaround_bo, device, 1024);
|
||||
|
||||
anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
|
||||
anv_scratch_pool_init(device, &device->scratch_pool);
|
||||
|
||||
anv_queue_init(device, &device->queue);
|
||||
|
||||
@@ -947,7 +947,7 @@ void anv_DestroyDevice(
|
||||
anv_block_pool_finish(&device->instruction_block_pool);
|
||||
anv_state_pool_finish(&device->surface_state_pool);
|
||||
anv_block_pool_finish(&device->surface_state_block_pool);
|
||||
anv_block_pool_finish(&device->scratch_block_pool);
|
||||
anv_scratch_pool_finish(device, &device->scratch_pool);
|
||||
|
||||
close(device->fd);
|
||||
|
||||
|
@@ -397,22 +397,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
struct brw_device_info *devinfo = &pipeline->device->info;
|
||||
uint32_t max_threads[] = {
|
||||
[MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
|
||||
[MESA_SHADER_TESS_CTRL] = devinfo->max_hs_threads,
|
||||
[MESA_SHADER_TESS_EVAL] = devinfo->max_ds_threads,
|
||||
[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
|
||||
[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
|
||||
[MESA_SHADER_COMPUTE] = devinfo->max_cs_threads,
|
||||
};
|
||||
|
||||
pipeline->prog_data[stage] = prog_data;
|
||||
pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
|
||||
pipeline->scratch_start[stage] = pipeline->total_scratch;
|
||||
pipeline->total_scratch =
|
||||
align_u32(pipeline->total_scratch, 1024) +
|
||||
prog_data->total_scratch * max_threads[stage];
|
||||
pipeline->bindings[stage] = *map;
|
||||
}
|
||||
|
||||
@@ -1176,7 +1162,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
|
||||
* of various prog_data pointers. Make them NULL by default.
|
||||
*/
|
||||
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
|
||||
memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
|
||||
memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
|
||||
|
||||
pipeline->vs_simd8 = NO_KERNEL;
|
||||
@@ -1185,7 +1170,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
|
||||
pipeline->ps_ksp0 = NO_KERNEL;
|
||||
|
||||
pipeline->active_stages = 0;
|
||||
pipeline->total_scratch = 0;
|
||||
|
||||
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
|
||||
struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
|
||||
@@ -1278,10 +1262,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
|
||||
if (extra && extra->use_rectlist)
|
||||
pipeline->topology = _3DPRIM_RECTLIST;
|
||||
|
||||
while (anv_block_pool_size(&device->scratch_block_pool) <
|
||||
pipeline->total_scratch)
|
||||
anv_block_pool_alloc(&device->scratch_block_pool);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
@@ -711,7 +711,7 @@ struct anv_device {
|
||||
|
||||
struct anv_queue queue;
|
||||
|
||||
struct anv_block_pool scratch_block_pool;
|
||||
struct anv_scratch_pool scratch_pool;
|
||||
|
||||
uint32_t default_mocs;
|
||||
|
||||
@@ -1471,8 +1471,6 @@ struct anv_pipeline {
|
||||
bool needs_data_cache;
|
||||
|
||||
const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES];
|
||||
uint32_t scratch_start[MESA_SHADER_STAGES];
|
||||
uint32_t total_scratch;
|
||||
struct {
|
||||
uint32_t start[MESA_SHADER_GEOMETRY + 1];
|
||||
uint32_t size[MESA_SHADER_GEOMETRY + 1];
|
||||
|
@@ -252,8 +252,10 @@ genX(graphics_pipeline_create)(
|
||||
vs.KernelStartPointer = pipeline->vs_vec4;
|
||||
|
||||
vs.ScratchSpaceBasePointer = (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
|
||||
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
|
||||
MESA_SHADER_VERTEX,
|
||||
vs_prog_data->base.base.total_scratch),
|
||||
.offset = 0,
|
||||
};
|
||||
vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base);
|
||||
|
||||
@@ -276,8 +278,10 @@ genX(graphics_pipeline_create)(
|
||||
gs.KernelStartPointer = pipeline->gs_kernel;
|
||||
|
||||
gs.ScratchSpaceBasePointer = (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
|
||||
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
|
||||
MESA_SHADER_GEOMETRY,
|
||||
gs_prog_data->base.base.total_scratch),
|
||||
.offset = 0,
|
||||
};
|
||||
gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
|
||||
|
||||
@@ -338,8 +342,10 @@ genX(graphics_pipeline_create)(
|
||||
ps.KernelStartPointer0 = pipeline->ps_ksp0;
|
||||
|
||||
ps.ScratchSpaceBasePointer = (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
|
||||
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
|
||||
MESA_SHADER_FRAGMENT,
|
||||
wm_prog_data->base.total_scratch),
|
||||
.offset = 0,
|
||||
};
|
||||
ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
|
||||
ps.MaximumNumberofThreads = device->info.max_wm_threads - 1;
|
||||
|
@@ -361,8 +361,10 @@ genX(graphics_pipeline_create)(
|
||||
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
|
||||
|
||||
gs.ScratchSpaceBasePointer = (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
|
||||
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
|
||||
MESA_SHADER_GEOMETRY,
|
||||
gs_prog_data->base.base.total_scratch),
|
||||
.offset = 0,
|
||||
};
|
||||
gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
|
||||
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
|
||||
@@ -431,8 +433,10 @@ genX(graphics_pipeline_create)(
|
||||
vs.SoftwareExceptionEnable = false;
|
||||
|
||||
vs.ScratchSpaceBasePointer = (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
|
||||
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
|
||||
MESA_SHADER_VERTEX,
|
||||
vs_prog_data->base.base.total_scratch),
|
||||
.offset = 0,
|
||||
};
|
||||
vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base);
|
||||
|
||||
@@ -483,8 +487,10 @@ genX(graphics_pipeline_create)(
|
||||
ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
|
||||
|
||||
ps.ScratchSpaceBasePointer = (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
|
||||
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
|
||||
MESA_SHADER_FRAGMENT,
|
||||
wm_prog_data->base.total_scratch),
|
||||
.offset = 0,
|
||||
};
|
||||
ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
|
||||
|
||||
|
@@ -33,12 +33,6 @@ void
|
||||
genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_bo *scratch_bo = NULL;
|
||||
|
||||
cmd_buffer->state.scratch_size =
|
||||
anv_block_pool_size(&device->scratch_block_pool);
|
||||
if (cmd_buffer->state.scratch_size > 0)
|
||||
scratch_bo = &device->scratch_block_pool.bo;
|
||||
|
||||
/* XXX: Do we need this on more than just BDW? */
|
||||
#if (GEN_GEN >= 8)
|
||||
@@ -55,7 +49,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
|
||||
sba.GeneralStateBaseAddress = (struct anv_address) { scratch_bo, 0 };
|
||||
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
|
||||
sba.GeneralStateMemoryObjectControlState = GENX(MOCS);
|
||||
sba.GeneralStateBaseAddressModifyEnable = true;
|
||||
|
||||
@@ -503,13 +497,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
cmd_buffer->state.vb_dirty &= ~vb_emit;
|
||||
|
||||
if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) {
|
||||
/* If somebody compiled a pipeline after starting a command buffer the
|
||||
* scratch bo may have grown since we started this cmd buffer (and
|
||||
* emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
|
||||
* reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
|
||||
if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
|
||||
anv_cmd_buffer_emit_state_base_address(cmd_buffer);
|
||||
|
||||
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
|
||||
|
||||
/* The exact descriptor layout is pulled from the pipeline, so we need
|
||||
|
@@ -64,7 +64,6 @@ genX(compute_pipeline_create)(
|
||||
* of various prog_data pointers. Make them NULL by default.
|
||||
*/
|
||||
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
|
||||
memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
|
||||
memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
|
||||
|
||||
pipeline->vs_simd8 = NO_KERNEL;
|
||||
@@ -72,7 +71,6 @@ genX(compute_pipeline_create)(
|
||||
pipeline->gs_kernel = NO_KERNEL;
|
||||
|
||||
pipeline->active_stages = 0;
|
||||
pipeline->total_scratch = 0;
|
||||
|
||||
pipeline->needs_data_cache = false;
|
||||
|
||||
@@ -103,8 +101,10 @@ genX(compute_pipeline_create)(
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) {
|
||||
vfe.ScratchSpaceBasePointer = (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = pipeline->scratch_start[MESA_SHADER_COMPUTE],
|
||||
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
|
||||
MESA_SHADER_COMPUTE,
|
||||
cs_prog_data->base.total_scratch),
|
||||
.offset = 0,
|
||||
};
|
||||
vfe.PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048);
|
||||
#if GEN_GEN > 7
|
||||
|
Reference in New Issue
Block a user