anv/pipeline: Lower pipeline layouts etc. after linking
This allows us to use the link-optimized shader for determining binding table layouts and, more importantly, URB layouts. For apps running on DXVK, this is extremely important as DXVK likes to declare max-size inputs and outputs and this lets is massively shrink our URB space requirements. VkPipeline-db results (Batman pipelines only) on KBL: total instructions in shared programs: 820403 -> 790008 (-3.70%) instructions in affected programs: 273759 -> 243364 (-11.10%) helped: 622 HURT: 42 total spills in shared programs: 8449 -> 5212 (-38.31%) spills in affected programs: 3427 -> 190 (-94.46%) helped: 607 HURT: 2 total fills in shared programs: 11638 -> 6067 (-47.87%) fills in affected programs: 5879 -> 308 (-94.76%) helped: 606 HURT: 3 Looking at shaders by hand, it makes the URB between TCS and TES go from containing 32 per-vertex varyings per tessellation shader pair to a more reasonable 8-12. For a 3-vertex patch, that's at least half the URB space no matter how big the patch section is. Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
This commit is contained in:
@@ -472,24 +472,17 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
|
|||||||
_mesa_sha1_final(&ctx, sha1_out);
|
_mesa_sha1_final(&ctx, sha1_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
static nir_shader *
|
static void
|
||||||
anv_pipeline_compile(struct anv_pipeline *pipeline,
|
anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||||
void *mem_ctx,
|
void *mem_ctx,
|
||||||
struct anv_pipeline_layout *layout,
|
struct anv_pipeline_stage *stage,
|
||||||
struct anv_pipeline_stage *stage,
|
struct anv_pipeline_layout *layout)
|
||||||
struct brw_stage_prog_data *prog_data,
|
|
||||||
struct anv_pipeline_bind_map *map)
|
|
||||||
{
|
{
|
||||||
const struct brw_compiler *compiler =
|
const struct brw_compiler *compiler =
|
||||||
pipeline->device->instance->physicalDevice.compiler;
|
pipeline->device->instance->physicalDevice.compiler;
|
||||||
|
|
||||||
nir_shader *nir = anv_shader_compile_to_nir(pipeline, mem_ctx,
|
struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
|
||||||
stage->module,
|
nir_shader *nir = stage->nir;
|
||||||
stage->entrypoint,
|
|
||||||
stage->stage,
|
|
||||||
stage->spec_info);
|
|
||||||
if (nir == NULL)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
|
NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
|
||||||
|
|
||||||
@@ -531,15 +524,17 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
|||||||
pipeline->needs_data_cache = true;
|
pipeline->needs_data_cache = true;
|
||||||
|
|
||||||
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
|
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
|
||||||
if (layout)
|
if (layout) {
|
||||||
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map);
|
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,
|
||||||
|
&stage->bind_map);
|
||||||
|
}
|
||||||
|
|
||||||
if (nir->info.stage != MESA_SHADER_COMPUTE)
|
if (nir->info.stage != MESA_SHADER_COMPUTE)
|
||||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
||||||
|
|
||||||
assert(nir->num_uniforms == prog_data->nr_params * 4);
|
assert(nir->num_uniforms == prog_data->nr_params * 4);
|
||||||
|
|
||||||
return nir;
|
stage->nir = nir;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -807,16 +802,12 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
|
|||||||
stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;
|
stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;
|
||||||
|
|
||||||
assert(num_rts <= max_rt);
|
assert(num_rts <= max_rt);
|
||||||
assert(stage->bind_map.surface_count + num_rts <= 256);
|
assert(stage->bind_map.surface_count == 0);
|
||||||
memmove(stage->bind_map.surface_to_descriptor + num_rts,
|
|
||||||
stage->bind_map.surface_to_descriptor,
|
|
||||||
stage->bind_map.surface_count *
|
|
||||||
sizeof(*stage->bind_map.surface_to_descriptor));
|
|
||||||
typed_memcpy(stage->bind_map.surface_to_descriptor,
|
typed_memcpy(stage->bind_map.surface_to_descriptor,
|
||||||
rt_bindings, num_rts);
|
rt_bindings, num_rts);
|
||||||
stage->bind_map.surface_count += num_rts;
|
stage->bind_map.surface_count += num_rts;
|
||||||
|
|
||||||
anv_fill_binding_table(&stage->prog_data.wm.base, num_rts);
|
anv_fill_binding_table(&stage->prog_data.wm.base, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const unsigned *
|
static const unsigned *
|
||||||
@@ -976,10 +967,11 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
|
|||||||
.sampler_to_descriptor = stages[s].sampler_to_descriptor
|
.sampler_to_descriptor = stages[s].sampler_to_descriptor
|
||||||
};
|
};
|
||||||
|
|
||||||
stages[s].nir = anv_pipeline_compile(pipeline, pipeline_ctx, layout,
|
stages[s].nir = anv_shader_compile_to_nir(pipeline, pipeline_ctx,
|
||||||
&stages[s],
|
stages[s].module,
|
||||||
&stages[s].prog_data.base,
|
stages[s].entrypoint,
|
||||||
&stages[s].bind_map);
|
stages[s].stage,
|
||||||
|
stages[s].spec_info);
|
||||||
if (stages[s].nir == NULL) {
|
if (stages[s].nir == NULL) {
|
||||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
goto fail;
|
goto fail;
|
||||||
@@ -1022,6 +1014,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
|
|||||||
|
|
||||||
void *stage_ctx = ralloc_context(NULL);
|
void *stage_ctx = ralloc_context(NULL);
|
||||||
|
|
||||||
|
anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);
|
||||||
|
|
||||||
const unsigned *code;
|
const unsigned *code;
|
||||||
switch (s) {
|
switch (s) {
|
||||||
case MESA_SHADER_VERTEX:
|
case MESA_SHADER_VERTEX:
|
||||||
@@ -1141,14 +1135,18 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
|
|||||||
|
|
||||||
void *mem_ctx = ralloc_context(NULL);
|
void *mem_ctx = ralloc_context(NULL);
|
||||||
|
|
||||||
stage.nir = anv_pipeline_compile(pipeline, mem_ctx, layout, &stage,
|
stage.nir = anv_shader_compile_to_nir(pipeline, mem_ctx,
|
||||||
&stage.prog_data.base,
|
stage.module,
|
||||||
&stage.bind_map);
|
stage.entrypoint,
|
||||||
|
stage.stage,
|
||||||
|
stage.spec_info);
|
||||||
if (stage.nir == NULL) {
|
if (stage.nir == NULL) {
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
|
||||||
|
|
||||||
NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
|
NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
|
||||||
&stage.prog_data.cs);
|
&stage.prog_data.cs);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user