vk: Implement scratch buffers to make spilling work
This commit is contained in:
@@ -248,13 +248,6 @@ really_do_vs_prog(struct brw_context *brw,
|
|||||||
|
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
|
|
||||||
if (stage_prog_data->total_scratch > 0)
|
|
||||||
if (!anv_bo_init_new(&pipeline->vs_scratch_bo,
|
|
||||||
pipeline->device,
|
|
||||||
stage_prog_data->total_scratch))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -535,12 +528,6 @@ really_do_wm_prog(struct brw_context *brw,
|
|||||||
|
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
|
|
||||||
if (prog_data->base.total_scratch > 0)
|
|
||||||
if (!anv_bo_init_new(&pipeline->ps_scratch_bo,
|
|
||||||
pipeline->device,
|
|
||||||
prog_data->base.total_scratch))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -594,13 +581,6 @@ really_do_gs_prog(struct brw_context *brw,
|
|||||||
|
|
||||||
ralloc_free(output.mem_ctx);
|
ralloc_free(output.mem_ctx);
|
||||||
|
|
||||||
if (output.prog_data.base.base.total_scratch) {
|
|
||||||
if (!anv_bo_init_new(&pipeline->gs_scratch_bo,
|
|
||||||
pipeline->device,
|
|
||||||
output.prog_data.base.base.total_scratch))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -684,6 +664,7 @@ fail_on_compile_error(int status, const char *msg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct anv_compiler {
|
struct anv_compiler {
|
||||||
|
struct anv_device *device;
|
||||||
struct intel_screen *screen;
|
struct intel_screen *screen;
|
||||||
struct brw_context *brw;
|
struct brw_context *brw;
|
||||||
struct gl_pipeline_object pipeline;
|
struct gl_pipeline_object pipeline;
|
||||||
@@ -710,6 +691,8 @@ anv_compiler_create(struct anv_device *device)
|
|||||||
if (compiler->brw == NULL)
|
if (compiler->brw == NULL)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
compiler->device = device;
|
||||||
|
|
||||||
compiler->brw->optionCache.info = NULL;
|
compiler->brw->optionCache.info = NULL;
|
||||||
compiler->brw->bufmgr = NULL;
|
compiler->brw->bufmgr = NULL;
|
||||||
compiler->brw->gen = devinfo->gen;
|
compiler->brw->gen = devinfo->gen;
|
||||||
@@ -967,6 +950,28 @@ anv_compile_shader_spirv(struct anv_compiler *compiler,
|
|||||||
unreachable("SPIR-V is not supported yet!");
|
unreachable("SPIR-V is not supported yet!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
|
||||||
|
struct brw_stage_prog_data *prog_data)
|
||||||
|
{
|
||||||
|
struct brw_device_info *devinfo = &pipeline->device->info;
|
||||||
|
uint32_t max_threads[] = {
|
||||||
|
[VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads,
|
||||||
|
[VK_SHADER_STAGE_TESS_CONTROL] = 0,
|
||||||
|
[VK_SHADER_STAGE_TESS_EVALUATION] = 0,
|
||||||
|
[VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads,
|
||||||
|
[VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads,
|
||||||
|
[VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads,
|
||||||
|
};
|
||||||
|
|
||||||
|
pipeline->prog_data[stage] = prog_data;
|
||||||
|
pipeline->active_stages |= 1 << stage;
|
||||||
|
pipeline->scratch_start[stage] = pipeline->total_scratch;
|
||||||
|
pipeline->total_scratch =
|
||||||
|
ALIGN_U32(pipeline->total_scratch, 1024) +
|
||||||
|
prog_data->total_scratch * max_threads[stage];
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
||||||
{
|
{
|
||||||
@@ -978,6 +983,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
|||||||
* of various prog_data pointers. Make them NULL by default.
|
* of various prog_data pointers. Make them NULL by default.
|
||||||
*/
|
*/
|
||||||
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
|
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
|
||||||
|
memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
|
||||||
|
|
||||||
brw->use_rep_send = pipeline->use_repclear;
|
brw->use_rep_send = pipeline->use_repclear;
|
||||||
brw->no_simd8 = pipeline->use_repclear;
|
brw->no_simd8 = pipeline->use_repclear;
|
||||||
@@ -1024,6 +1030,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
|||||||
|
|
||||||
bool success;
|
bool success;
|
||||||
pipeline->active_stages = 0;
|
pipeline->active_stages = 0;
|
||||||
|
pipeline->total_scratch = 0;
|
||||||
|
|
||||||
if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
|
if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
|
||||||
struct brw_vs_prog_key vs_key;
|
struct brw_vs_prog_key vs_key;
|
||||||
@@ -1035,8 +1042,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
|||||||
|
|
||||||
success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
|
success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
|
||||||
fail_if(!success, "do_wm_prog failed\n");
|
fail_if(!success, "do_wm_prog failed\n");
|
||||||
pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base;
|
add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
|
||||||
pipeline->active_stages |= VK_SHADER_STAGE_VERTEX_BIT;;
|
&pipeline->vs_prog_data.base.base);
|
||||||
} else {
|
} else {
|
||||||
memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
|
memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
|
||||||
pipeline->vs_simd8 = NO_KERNEL;
|
pipeline->vs_simd8 = NO_KERNEL;
|
||||||
@@ -1053,8 +1060,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
|||||||
|
|
||||||
success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
|
success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
|
||||||
fail_if(!success, "do_gs_prog failed\n");
|
fail_if(!success, "do_gs_prog failed\n");
|
||||||
pipeline->active_stages |= VK_SHADER_STAGE_GEOMETRY_BIT;
|
add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
|
||||||
pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base;
|
&pipeline->gs_prog_data.base.base);
|
||||||
} else {
|
} else {
|
||||||
pipeline->gs_vec4 = NO_KERNEL;
|
pipeline->gs_vec4 = NO_KERNEL;
|
||||||
}
|
}
|
||||||
@@ -1069,8 +1076,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
|||||||
|
|
||||||
success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
|
success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
|
||||||
fail_if(!success, "do_wm_prog failed\n");
|
fail_if(!success, "do_wm_prog failed\n");
|
||||||
pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base;
|
add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
|
||||||
pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
&pipeline->wm_prog_data.base);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
|
if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
|
||||||
@@ -1083,12 +1090,16 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
|
|||||||
|
|
||||||
success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
|
success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
|
||||||
fail_if(!success, "brw_codegen_cs_prog failed\n");
|
fail_if(!success, "brw_codegen_cs_prog failed\n");
|
||||||
pipeline->prog_data[VK_SHADER_STAGE_COMPUTE] = &pipeline->cs_prog_data.base;
|
add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
|
||||||
pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
|
&pipeline->cs_prog_data.base);
|
||||||
}
|
}
|
||||||
|
|
||||||
brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
|
brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
|
||||||
|
|
||||||
|
struct anv_device *device = compiler->device;
|
||||||
|
while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
|
||||||
|
anv_block_pool_alloc(&device->scratch_block_pool);
|
||||||
|
|
||||||
gen7_compute_urb_partition(pipeline);
|
gen7_compute_urb_partition(pipeline);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@@ -404,6 +404,8 @@ VkResult anv_CreateDevice(
|
|||||||
anv_state_pool_init(&device->surface_state_pool,
|
anv_state_pool_init(&device->surface_state_pool,
|
||||||
&device->surface_state_block_pool);
|
&device->surface_state_block_pool);
|
||||||
|
|
||||||
|
anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
|
||||||
|
|
||||||
device->info = *physicalDevice->info;
|
device->info = *physicalDevice->info;
|
||||||
|
|
||||||
device->compiler = anv_compiler_create(device);
|
device->compiler = anv_compiler_create(device);
|
||||||
@@ -2387,9 +2389,14 @@ static void
|
|||||||
anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
|
anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
|
||||||
{
|
{
|
||||||
struct anv_device *device = cmd_buffer->device;
|
struct anv_device *device = cmd_buffer->device;
|
||||||
|
struct anv_bo *scratch_bo = NULL;
|
||||||
|
|
||||||
|
cmd_buffer->scratch_size = device->scratch_block_pool.size;
|
||||||
|
if (cmd_buffer->scratch_size > 0)
|
||||||
|
scratch_bo = &device->scratch_block_pool.bo;
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
|
anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
|
||||||
.GeneralStateBaseAddress = { NULL, 0 },
|
.GeneralStateBaseAddress = { scratch_bo, 0 },
|
||||||
.GeneralStateMemoryObjectControlState = GEN8_MOCS,
|
.GeneralStateMemoryObjectControlState = GEN8_MOCS,
|
||||||
.GeneralStateBaseAddressModifyEnable = true,
|
.GeneralStateBaseAddressModifyEnable = true,
|
||||||
.GeneralStateBufferSize = 0xfffff,
|
.GeneralStateBufferSize = 0xfffff,
|
||||||
@@ -3213,8 +3220,16 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
|
if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
|
||||||
|
/* If somebody compiled a pipeline after starting a command buffer the
|
||||||
|
* scratch bo may have grown since we started this cmd buffer (and
|
||||||
|
* emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
|
||||||
|
* reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
|
||||||
|
if (cmd_buffer->scratch_size < pipeline->total_scratch)
|
||||||
|
anv_cmd_buffer_emit_state_base_address(cmd_buffer);
|
||||||
|
|
||||||
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
|
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
|
||||||
|
}
|
||||||
|
|
||||||
if (cmd_buffer->descriptors_dirty)
|
if (cmd_buffer->descriptors_dirty)
|
||||||
flush_descriptor_sets(cmd_buffer);
|
flush_descriptor_sets(cmd_buffer);
|
||||||
|
@@ -573,8 +573,8 @@ anv_pipeline_create(
|
|||||||
.BindingTableEntryCount = 0,
|
.BindingTableEntryCount = 0,
|
||||||
.ExpectedVertexCount = pipeline->gs_vertex_count,
|
.ExpectedVertexCount = pipeline->gs_vertex_count,
|
||||||
|
|
||||||
.PerThreadScratchSpace = 0,
|
.ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY],
|
||||||
.ScratchSpaceBasePointer = 0,
|
.PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048),
|
||||||
|
|
||||||
.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
|
.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
|
||||||
.OutputTopology = gs_prog_data->output_topology,
|
.OutputTopology = gs_prog_data->output_topology,
|
||||||
@@ -628,11 +628,8 @@ anv_pipeline_create(
|
|||||||
.AccessesUAV = false,
|
.AccessesUAV = false,
|
||||||
.SoftwareExceptionEnable = false,
|
.SoftwareExceptionEnable = false,
|
||||||
|
|
||||||
/* FIXME: pointer needs to be assigned outside as it aliases
|
.ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX],
|
||||||
* PerThreadScratchSpace.
|
.PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048),
|
||||||
*/
|
|
||||||
.ScratchSpaceBasePointer = 0,
|
|
||||||
.PerThreadScratchSpace = 0,
|
|
||||||
|
|
||||||
.DispatchGRFStartRegisterForURBData =
|
.DispatchGRFStartRegisterForURBData =
|
||||||
vue_prog_data->base.dispatch_grf_start_reg,
|
vue_prog_data->base.dispatch_grf_start_reg,
|
||||||
@@ -676,8 +673,8 @@ anv_pipeline_create(
|
|||||||
.VectorMaskEnable = true,
|
.VectorMaskEnable = true,
|
||||||
.SamplerCount = 1,
|
.SamplerCount = 1,
|
||||||
|
|
||||||
.ScratchSpaceBasePointer = 0,
|
.ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT],
|
||||||
.PerThreadScratchSpace = 0,
|
.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048),
|
||||||
|
|
||||||
.MaximumNumberofThreadsPerPSD = 64 - 2,
|
.MaximumNumberofThreadsPerPSD = 64 - 2,
|
||||||
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
||||||
@@ -757,11 +754,13 @@ VkResult anv_CreateComputePipeline(
|
|||||||
|
|
||||||
anv_compiler_run(device->compiler, pipeline);
|
anv_compiler_run(device->compiler, pipeline);
|
||||||
|
|
||||||
|
const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
|
||||||
|
|
||||||
anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE,
|
anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE,
|
||||||
.ScratchSpaceBasePointer = 0, /* FIXME: Scratch bo, this should be a reloc? */
|
.ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT],
|
||||||
.StackSize = 0,
|
.PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048),
|
||||||
.PerThreadScratchSpace = 0,
|
|
||||||
.ScratchSpaceBasePointerHigh = 0,
|
.ScratchSpaceBasePointerHigh = 0,
|
||||||
|
.StackSize = 0,
|
||||||
|
|
||||||
.MaximumNumberofThreads = device->info.max_cs_threads - 1,
|
.MaximumNumberofThreads = device->info.max_cs_threads - 1,
|
||||||
.NumberofURBEntries = 2,
|
.NumberofURBEntries = 2,
|
||||||
|
@@ -384,6 +384,8 @@ struct anv_device {
|
|||||||
|
|
||||||
struct anv_queue queue;
|
struct anv_queue queue;
|
||||||
|
|
||||||
|
struct anv_block_pool scratch_block_pool;
|
||||||
|
|
||||||
struct anv_compiler * compiler;
|
struct anv_compiler * compiler;
|
||||||
struct anv_aub_writer * aub_writer;
|
struct anv_aub_writer * aub_writer;
|
||||||
pthread_mutex_t mutex;
|
pthread_mutex_t mutex;
|
||||||
@@ -655,6 +657,7 @@ struct anv_cmd_buffer {
|
|||||||
uint32_t dirty;
|
uint32_t dirty;
|
||||||
uint32_t compute_dirty;
|
uint32_t compute_dirty;
|
||||||
uint32_t descriptors_dirty;
|
uint32_t descriptors_dirty;
|
||||||
|
uint32_t scratch_size;
|
||||||
struct anv_pipeline * pipeline;
|
struct anv_pipeline * pipeline;
|
||||||
struct anv_pipeline * compute_pipeline;
|
struct anv_pipeline * compute_pipeline;
|
||||||
struct anv_framebuffer * framebuffer;
|
struct anv_framebuffer * framebuffer;
|
||||||
@@ -696,6 +699,8 @@ struct anv_pipeline {
|
|||||||
struct brw_gs_prog_data gs_prog_data;
|
struct brw_gs_prog_data gs_prog_data;
|
||||||
struct brw_cs_prog_data cs_prog_data;
|
struct brw_cs_prog_data cs_prog_data;
|
||||||
struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE];
|
struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE];
|
||||||
|
uint32_t scratch_start[VK_NUM_SHADER_STAGE];
|
||||||
|
uint32_t total_scratch;
|
||||||
struct {
|
struct {
|
||||||
uint32_t vs_start;
|
uint32_t vs_start;
|
||||||
uint32_t vs_size;
|
uint32_t vs_size;
|
||||||
@@ -705,11 +710,6 @@ struct anv_pipeline {
|
|||||||
uint32_t nr_gs_entries;
|
uint32_t nr_gs_entries;
|
||||||
} urb;
|
} urb;
|
||||||
|
|
||||||
struct anv_bo vs_scratch_bo;
|
|
||||||
struct anv_bo ps_scratch_bo;
|
|
||||||
struct anv_bo gs_scratch_bo;
|
|
||||||
struct anv_bo cs_scratch_bo;
|
|
||||||
|
|
||||||
uint32_t active_stages;
|
uint32_t active_stages;
|
||||||
struct anv_state_stream program_stream;
|
struct anv_state_stream program_stream;
|
||||||
struct anv_state blend_state;
|
struct anv_state blend_state;
|
||||||
|
Reference in New Issue
Block a user