anv: implement VK_EXT_graphics_pipeline_library

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15637>
This commit is contained in:
Lionel Landwerlin
2022-03-28 15:42:27 +03:00
committed by Marge Bot
parent 0b8a2de2a1
commit 3d49cdb71e
10 changed files with 920 additions and 175 deletions

View File

@@ -63,3 +63,14 @@ dEQP-VK.dynamic_rendering.suballocation.load_store_op_none.stencil_d32_sfloat_s8
# New CTS failures in 1.3.5.0 # New CTS failures in 1.3.5.0
dEQP-VK.drm_format_modifiers.export_import_fmt_features2.b4g4r4a4_unorm_pack16,Crash dEQP-VK.drm_format_modifiers.export_import_fmt_features2.b4g4r4a4_unorm_pack16,Crash
# Waiting on clarification from https://gitlab.khronos.org/vulkan/vulkan/-/issues/3115
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage_delayed_destroy,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage_no_cache,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage_delayed_destroy,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage_delayed_destroy,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage_no_cache,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage_no_cache,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage,Crash

View File

@@ -431,6 +431,12 @@ void anv_CmdBindPipeline(
if (modified) if (modified)
cmd_buffer->state.push_constants_dirty |= stages; cmd_buffer->state.push_constants_dirty |= stages;
} }
if ((gfx_pipeline->fs_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) &&
push->fs.msaa_flags != gfx_pipeline->fs_msaa_flags) {
push->fs.msaa_flags = gfx_pipeline->fs_msaa_flags;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
}
break; break;
} }

View File

@@ -308,6 +308,7 @@ get_device_extensions(const struct anv_physical_device *device,
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR, VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
.EXT_global_priority_query = device->max_context_priority >= .EXT_global_priority_query = device->max_context_priority >=
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR, VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
.EXT_graphics_pipeline_library = true,
.EXT_host_query_reset = true, .EXT_host_query_reset = true,
.EXT_image_2d_view_of_3d = true, .EXT_image_2d_view_of_3d = true,
.EXT_image_robustness = true, .EXT_image_robustness = true,
@@ -1383,6 +1384,7 @@ void anv_GetPhysicalDeviceFeatures2(
/* VK_EXT_global_priority_query */ /* VK_EXT_global_priority_query */
.globalPriorityQuery = true, .globalPriorityQuery = true,
.graphicsPipelineLibrary = true,
/* VK_KHR_fragment_shading_rate */ /* VK_KHR_fragment_shading_rate */
.pipelineFragmentShadingRate = true, .pipelineFragmentShadingRate = true,
@@ -2193,6 +2195,14 @@ void anv_GetPhysicalDeviceProperties2(
break; break;
} }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT: {
VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *props =
(VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *)ext;
props->graphicsPipelineLibraryFastLinking = true;
props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
VkPhysicalDeviceLineRasterizationPropertiesEXT *props = VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext; (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;

View File

@@ -74,6 +74,7 @@ void anv_nir_apply_pipeline_layout(nir_shader *shader,
void anv_nir_compute_push_layout(nir_shader *nir, void anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice, const struct anv_physical_device *pdevice,
bool robust_buffer_access, bool robust_buffer_access,
bool fragment_dynamic,
struct brw_stage_prog_data *prog_data, struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map, struct anv_pipeline_bind_map *map,
void *mem_ctx); void *mem_ctx);

View File

@@ -32,6 +32,7 @@ void
anv_nir_compute_push_layout(nir_shader *nir, anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice, const struct anv_physical_device *pdevice,
bool robust_buffer_access, bool robust_buffer_access,
bool fragment_dynamic,
struct brw_stage_prog_data *prog_data, struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map, struct anv_pipeline_bind_map *map,
void *mem_ctx) void *mem_ctx)
@@ -102,6 +103,14 @@ anv_nir_compute_push_layout(nir_shader *nir,
push_end = MAX2(push_end, push_reg_mask_end); push_end = MAX2(push_end, push_reg_mask_end);
} }
if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) {
const uint32_t fs_msaa_flags_start =
offsetof(struct anv_push_constants, fs.msaa_flags);
const uint32_t fs_msaa_flags_end = fs_msaa_flags_start + sizeof(uint32_t);
push_start = MIN2(push_start, fs_msaa_flags_start);
push_end = MAX2(push_end, fs_msaa_flags_end);
}
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) { if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
/* For compute shaders, we always have to have the subgroup ID. The /* For compute shaders, we always have to have the subgroup ID. The
* back-end compiler will "helpfully" add it for us in the last push * back-end compiler will "helpfully" add it for us in the last push
@@ -275,6 +284,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
map->push_ranges[0] = push_constant_range; map->push_ranges[0] = push_constant_range;
} }
if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) {
struct brw_wm_prog_data *wm_prog_data =
container_of(prog_data, struct brw_wm_prog_data, base);
const uint32_t fs_msaa_flags_offset =
offsetof(struct anv_push_constants, fs.msaa_flags);
assert(fs_msaa_flags_offset >= push_start);
wm_prog_data->msaa_flags_param =
(fs_msaa_flags_offset - push_start) / 4;
}
/* Now that we're done computing the push constant portion of the /* Now that we're done computing the push constant portion of the
* bind map, hash it. This lets us quickly determine if the actual * bind map, hash it. This lets us quickly determine if the actual
* mapping has changed and not just a no-op pipeline change. * mapping has changed and not just a no-op pipeline change.

View File

@@ -316,6 +316,10 @@ anv_check_for_primitive_replication(struct anv_device *device,
if (stages & ~(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT)) if (stages & ~(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT))
return false; return false;
/* It's possible we have no vertex shader yet (with pipeline libraries) */
if (!(stages & VK_SHADER_STAGE_VERTEX_BIT))
return false;
int view_count = util_bitcount(view_mask); int view_count = util_bitcount(view_mask);
if (view_count == 1 || view_count > primitive_replication_max_views) if (view_count == 1 || view_count > primitive_replication_max_views)
return false; return false;

File diff suppressed because it is too large Load Diff

View File

@@ -2437,6 +2437,15 @@ struct anv_push_constants {
*/ */
uint64_t desc_sets[MAX_SETS]; uint64_t desc_sets[MAX_SETS];
union {
struct {
/** Dynamic MSAA value */
uint32_t msaa_flags;
/** Pad out to a multiple of 32 bytes */
uint32_t pad[1];
} fs;
struct { struct {
/** Base workgroup ID /** Base workgroup ID
* *
@@ -2452,6 +2461,7 @@ struct anv_push_constants {
uint32_t subgroup_id; uint32_t subgroup_id;
} cs; } cs;
}; };
};
struct anv_surface_state { struct anv_surface_state {
struct anv_state state; struct anv_state state;
@@ -3121,10 +3131,12 @@ anv_shader_bin_create(struct anv_device *device,
const struct anv_pipeline_bind_map *bind_map, const struct anv_pipeline_bind_map *bind_map,
const struct anv_push_descriptor_info *push_desc_info); const struct anv_push_descriptor_info *push_desc_info);
static inline void static inline struct anv_shader_bin *
anv_shader_bin_ref(struct anv_shader_bin *shader) anv_shader_bin_ref(struct anv_shader_bin *shader)
{ {
vk_pipeline_cache_object_ref(&shader->base); vk_pipeline_cache_object_ref(&shader->base);
return shader;
} }
static inline void static inline void
@@ -3144,10 +3156,17 @@ struct anv_pipeline_executable {
enum anv_pipeline_type { enum anv_pipeline_type {
ANV_PIPELINE_GRAPHICS, ANV_PIPELINE_GRAPHICS,
ANV_PIPELINE_GRAPHICS_LIB,
ANV_PIPELINE_COMPUTE, ANV_PIPELINE_COMPUTE,
ANV_PIPELINE_RAY_TRACING, ANV_PIPELINE_RAY_TRACING,
}; };
#define ALL_GRAPHICS_LIB_FLAGS \
(VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \
VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)
struct anv_pipeline { struct anv_pipeline {
struct vk_object_base base; struct vk_object_base base;
@@ -3185,10 +3204,48 @@ struct anv_pipeline {
struct anv_graphics_base_pipeline { struct anv_graphics_base_pipeline {
struct anv_pipeline base; struct anv_pipeline base;
struct vk_sample_locations_state sample_locations;
/* Shaders */ /* Shaders */
struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT]; struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
VkShaderStageFlags active_stages; VkShaderStageFlags active_stages;
/* True if at the time the fragment shader was compiled, it didn't have all
* the information to avoid BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC.
*/
bool fragment_dynamic;
};
/* The library graphics pipeline object has a partial graphic state and
* possibly some shaders. If requested, shaders are also present in NIR early
* form.
*/
struct anv_graphics_lib_pipeline {
struct anv_graphics_base_pipeline base;
VkGraphicsPipelineLibraryFlagsEXT lib_flags;
struct vk_graphics_pipeline_all_state all_state;
struct vk_graphics_pipeline_state state;
/* Retained shaders for link optimization. */
struct {
/* This hash is the same as computed in
* anv_graphics_pipeline_gather_shaders().
*/
unsigned char shader_sha1[20];
enum gl_subgroup_size subgroup_size_type;
/* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
* constants.
*/
nir_shader * nir;
} retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
/* Whether the shaders have been retained */
bool retain_shaders;
}; };
/* The final graphics pipeline object has all the graphics state ready to be /* The final graphics pipeline object has all the graphics state ready to be
@@ -3238,6 +3295,8 @@ struct anv_graphics_pipeline {
uint32_t vertex_input_elems; uint32_t vertex_input_elems;
uint32_t vertex_input_data[96]; uint32_t vertex_input_data[96];
enum brw_wm_msaa_flags fs_msaa_flags;
/* Pre computed CS instructions that can directly be copied into /* Pre computed CS instructions that can directly be copied into
* anv_cmd_buffer. * anv_cmd_buffer.
*/ */
@@ -3305,6 +3364,7 @@ struct anv_ray_tracing_pipeline {
ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS) ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
ANV_DECL_PIPELINE_DOWNCAST(graphics_base, ANV_PIPELINE_GRAPHICS) ANV_DECL_PIPELINE_DOWNCAST(graphics_base, ANV_PIPELINE_GRAPHICS)
ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE) ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING) ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
@@ -3315,6 +3375,13 @@ anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0; return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
} }
static inline bool
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
gl_shader_stage stage)
{
return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
}
static inline bool static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline) anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
{ {

View File

@@ -1497,7 +1497,8 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
wm_prog_data->uses_kill; wm_prog_data->uses_kill;
wm.BarycentricInterpolationMode = wm.BarycentricInterpolationMode =
wm_prog_data_barycentric_modes(wm_prog_data, 0); wm_prog_data_barycentric_modes(wm_prog_data,
pipeline->fs_msaa_flags);
} }
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm); GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
@@ -1525,7 +1526,10 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) { anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data, intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
ms != NULL ? ms->rasterization_samples : 1, ms != NULL ? ms->rasterization_samples : 1,
0 /* msaa_flags */); pipeline->fs_msaa_flags);
const bool persample =
brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags);
ps.KernelStartPointer0 = fs_bin->kernel.offset + ps.KernelStartPointer0 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
@@ -1541,8 +1545,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count; ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length; wm_prog_data->base.ubo_ranges[0].length;
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? ps.PositionXYOffsetSelect =
POSOFFSET_SAMPLE: POSOFFSET_NONE; !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID;
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1; ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
@@ -1582,7 +1587,7 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
ps.PixelShaderIsPerSample = ps.PixelShaderIsPerSample =
brw_wm_prog_data_is_persample(wm_prog_data, 0); brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags);
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
@@ -1614,14 +1619,14 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients = ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
wm_prog_data->uses_depth_w_coefficients; wm_prog_data->uses_depth_w_coefficients;
ps.PixelShaderIsPerCoarsePixel = ps.PixelShaderIsPerCoarsePixel =
brw_wm_prog_data_is_coarse(wm_prog_data, 0); brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
#endif #endif
#if GFX_VERx10 >= 125 #if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses /* TODO: We should only require this when the last geometry shader uses
* a fragment shading rate that is not constant. * a fragment shading rate that is not constant.
*/ */
ps.EnablePSDependencyOnCPsizeChange = ps.EnablePSDependencyOnCPsizeChange =
brw_wm_prog_data_is_coarse(wm_prog_data, 0); brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
#endif #endif
} }
} }

View File

@@ -350,7 +350,7 @@ genX(emit_shading_rate)(struct anv_batch *batch,
{ {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
const bool cps_enable = wm_prog_data && const bool cps_enable = wm_prog_data &&
brw_wm_prog_data_is_coarse(wm_prog_data, 0); brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
#if GFX_VER == 11 #if GFX_VER == 11
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) { anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
@@ -463,7 +463,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VER >= 11 #if GFX_VER >= 11
if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate && if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)) (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)))
genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr); genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
#endif /* GFX_VER >= 11 */ #endif /* GFX_VER >= 11 */