anv: Use different BOs for different scratch sizes and stages

This solves a race condition where we can end up having different stages stomp on each other because they're all trying to scratch in the same BO but they have different views of its layout. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: "12.0" <mesa-stable@lists.freedesktop.org>
2016-06-16 15:26:54 -07:00
parent 45c0f60999
commit c2f2c8e407
7 changed files with 32 additions and 55 deletions
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -878,7 +878,7 @@ VkResult anv_CreateDevice(

   anv_bo_init_new(&device->workaround_bo, device, 1024);

-   anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
+   anv_scratch_pool_init(device, &device->scratch_pool);

   anv_queue_init(device, &device->queue);

@@ -947,7 +947,7 @@ void anv_DestroyDevice(
   anv_block_pool_finish(&device->instruction_block_pool);
   anv_state_pool_finish(&device->surface_state_pool);
   anv_block_pool_finish(&device->surface_state_block_pool);
-   anv_block_pool_finish(&device->scratch_block_pool);
+   anv_scratch_pool_finish(device, &device->scratch_pool);

   close(device->fd);

--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -397,22 +397,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
                                const struct brw_stage_prog_data *prog_data,
                                struct anv_pipeline_bind_map *map)
 {
-   struct brw_device_info *devinfo = &pipeline->device->info;
-   uint32_t max_threads[] = {
-      [MESA_SHADER_VERTEX]                  = devinfo->max_vs_threads,
-      [MESA_SHADER_TESS_CTRL]               = devinfo->max_hs_threads,
-      [MESA_SHADER_TESS_EVAL]               = devinfo->max_ds_threads,
-      [MESA_SHADER_GEOMETRY]                = devinfo->max_gs_threads,
-      [MESA_SHADER_FRAGMENT]                = devinfo->max_wm_threads,
-      [MESA_SHADER_COMPUTE]                 = devinfo->max_cs_threads,
-   };
-
   pipeline->prog_data[stage] = prog_data;
   pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
-   pipeline->scratch_start[stage] = pipeline->total_scratch;
-   pipeline->total_scratch =
-      align_u32(pipeline->total_scratch, 1024) +
-      prog_data->total_scratch * max_threads[stage];
   pipeline->bindings[stage] = *map;
 }

@@ -1176,7 +1162,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    * of various prog_data pointers.  Make them NULL by default.
    */
   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));

   pipeline->vs_simd8 = NO_KERNEL;
@@ -1185,7 +1170,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
   pipeline->ps_ksp0 = NO_KERNEL;

   pipeline->active_stages = 0;
-   pipeline->total_scratch = 0;

   const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
   struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
@@ -1278,10 +1262,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
   if (extra && extra->use_rectlist)
      pipeline->topology = _3DPRIM_RECTLIST;

-   while (anv_block_pool_size(&device->scratch_block_pool) <
-          pipeline->total_scratch)
-      anv_block_pool_alloc(&device->scratch_block_pool);
-
   return VK_SUCCESS;
 }

--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -711,7 +711,7 @@ struct anv_device {

    struct anv_queue                            queue;

-    struct anv_block_pool                       scratch_block_pool;
+    struct anv_scratch_pool                     scratch_pool;

    uint32_t                                    default_mocs;

@@ -1471,8 +1471,6 @@ struct anv_pipeline {
   bool                                         needs_data_cache;

   const struct brw_stage_prog_data *           prog_data[MESA_SHADER_STAGES];
-   uint32_t                                     scratch_start[MESA_SHADER_STAGES];
-   uint32_t                                     total_scratch;
   struct {
      uint32_t                                  start[MESA_SHADER_GEOMETRY + 1];
      uint32_t                                  size[MESA_SHADER_GEOMETRY + 1];
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -252,8 +252,10 @@ genX(graphics_pipeline_create)(
         vs.KernelStartPointer         = pipeline->vs_vec4;

         vs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_VERTEX,
+                                         vs_prog_data->base.base.total_scratch),
+            .offset = 0,
         };
         vs.PerThreadScratchSpace      = scratch_space(&vs_prog_data->base.base);

@@ -276,8 +278,10 @@ genX(graphics_pipeline_create)(
         gs.KernelStartPointer         = pipeline->gs_kernel;

         gs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_GEOMETRY,
+                                         gs_prog_data->base.base.total_scratch),
+            .offset = 0,
         };
         gs.PerThreadScratchSpace      = scratch_space(&gs_prog_data->base.base);

@@ -338,8 +342,10 @@ genX(graphics_pipeline_create)(
         ps.KernelStartPointer0           = pipeline->ps_ksp0;

         ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_FRAGMENT,
+                                         wm_prog_data->base.total_scratch),
+            .offset = 0,
         };
         ps.PerThreadScratchSpace         = scratch_space(&wm_prog_data->base);
         ps.MaximumNumberofThreads        = device->info.max_wm_threads - 1;
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -361,8 +361,10 @@ genX(graphics_pipeline_create)(
         gs.ExpectedVertexCount     = gs_prog_data->vertices_in;

         gs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_GEOMETRY,
+                                         gs_prog_data->base.base.total_scratch),
+            .offset = 0,
         };
         gs.PerThreadScratchSpace   = scratch_space(&gs_prog_data->base.base);
         gs.OutputVertexSize        = gs_prog_data->output_vertex_size_hwords * 2 - 1;
@@ -431,8 +433,10 @@ genX(graphics_pipeline_create)(
         vs.SoftwareExceptionEnable       = false;

         vs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_VERTEX,
+                                         vs_prog_data->base.base.total_scratch),
+            .offset = 0,
         };
         vs.PerThreadScratchSpace   = scratch_space(&vs_prog_data->base.base);

@@ -483,8 +487,10 @@ genX(graphics_pipeline_create)(
         ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;

         ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_FRAGMENT,
+                                         wm_prog_data->base.total_scratch),
+            .offset = 0,
         };
         ps.PerThreadScratchSpace   = scratch_space(&wm_prog_data->base);

--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -33,12 +33,6 @@ void
 genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
 {
   struct anv_device *device = cmd_buffer->device;
-   struct anv_bo *scratch_bo = NULL;
-
-   cmd_buffer->state.scratch_size =
-      anv_block_pool_size(&device->scratch_block_pool);
-   if (cmd_buffer->state.scratch_size > 0)
-      scratch_bo = &device->scratch_block_pool.bo;

 /* XXX: Do we need this on more than just BDW? */
 #if (GEN_GEN >= 8)
@@ -55,7 +49,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
 #endif

   anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
-      sba.GeneralStateBaseAddress = (struct anv_address) { scratch_bo, 0 };
+      sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
      sba.GeneralStateMemoryObjectControlState = GENX(MOCS);
      sba.GeneralStateBaseAddressModifyEnable = true;

@@ -503,13 +497,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
   cmd_buffer->state.vb_dirty &= ~vb_emit;

   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) {
-      /* If somebody compiled a pipeline after starting a command buffer the
-       * scratch bo may have grown since we started this cmd buffer (and
-       * emitted STATE_BASE_ADDRESS).  If we're binding that pipeline now,
-       * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
-      if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
-         anv_cmd_buffer_emit_state_base_address(cmd_buffer);
-
      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);

      /* The exact descriptor layout is pulled from the pipeline, so we need
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -64,7 +64,6 @@ genX(compute_pipeline_create)(
    * of various prog_data pointers.  Make them NULL by default.
    */
   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));

   pipeline->vs_simd8 = NO_KERNEL;
@@ -72,7 +71,6 @@ genX(compute_pipeline_create)(
   pipeline->gs_kernel = NO_KERNEL;

   pipeline->active_stages = 0;
-   pipeline->total_scratch = 0;

   pipeline->needs_data_cache = false;

@@ -103,8 +101,10 @@ genX(compute_pipeline_create)(

   anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) {
      vfe.ScratchSpaceBasePointer = (struct anv_address) {
-         .bo = NULL,
-         .offset = pipeline->scratch_start[MESA_SHADER_COMPUTE],
+         .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                      MESA_SHADER_COMPUTE,
+                                      cs_prog_data->base.total_scratch),
+         .offset = 0,
      };
      vfe.PerThreadScratchSpace  = ffs(cs_prog_data->base.total_scratch / 2048);
 #if GEN_GEN > 7