anv: implement VK_EXT_graphics_pipeline_library

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15637>
This commit is contained in:
Lionel Landwerlin
2022-03-28 15:42:27 +03:00
committed by Marge Bot
parent 0b8a2de2a1
commit 3d49cdb71e
10 changed files with 920 additions and 175 deletions

View File

@@ -63,3 +63,14 @@ dEQP-VK.dynamic_rendering.suballocation.load_store_op_none.stencil_d32_sfloat_s8
# New CTS failures in 1.3.5.0
dEQP-VK.drm_format_modifiers.export_import_fmt_features2.b4g4r4a4_unorm_pack16,Crash
# Waiting on clarification from https://gitlab.khronos.org/vulkan/vulkan/-/issues/3115
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage_delayed_destroy,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage_no_cache,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage_delayed_destroy,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage_delayed_destroy,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage_no_cache,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage_no_cache,Crash
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage,Crash

View File

@@ -431,6 +431,12 @@ void anv_CmdBindPipeline(
if (modified)
cmd_buffer->state.push_constants_dirty |= stages;
}
if ((gfx_pipeline->fs_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) &&
push->fs.msaa_flags != gfx_pipeline->fs_msaa_flags) {
push->fs.msaa_flags = gfx_pipeline->fs_msaa_flags;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
}
break;
}

View File

@@ -308,6 +308,7 @@ get_device_extensions(const struct anv_physical_device *device,
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
.EXT_global_priority_query = device->max_context_priority >=
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
.EXT_graphics_pipeline_library = true,
.EXT_host_query_reset = true,
.EXT_image_2d_view_of_3d = true,
.EXT_image_robustness = true,
@@ -1383,6 +1384,7 @@ void anv_GetPhysicalDeviceFeatures2(
/* VK_EXT_global_priority_query */
.globalPriorityQuery = true,
.graphicsPipelineLibrary = true,
/* VK_KHR_fragment_shading_rate */
.pipelineFragmentShadingRate = true,
@@ -2193,6 +2195,14 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT: {
VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *props =
(VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *)ext;
props->graphicsPipelineLibraryFastLinking = true;
props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;

View File

@@ -74,6 +74,7 @@ void anv_nir_apply_pipeline_layout(nir_shader *shader,
void anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
bool robust_buffer_access,
bool fragment_dynamic,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
void *mem_ctx);

View File

@@ -32,6 +32,7 @@ void
anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
bool robust_buffer_access,
bool fragment_dynamic,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
void *mem_ctx)
@@ -102,6 +103,14 @@ anv_nir_compute_push_layout(nir_shader *nir,
push_end = MAX2(push_end, push_reg_mask_end);
}
if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) {
const uint32_t fs_msaa_flags_start =
offsetof(struct anv_push_constants, fs.msaa_flags);
const uint32_t fs_msaa_flags_end = fs_msaa_flags_start + sizeof(uint32_t);
push_start = MIN2(push_start, fs_msaa_flags_start);
push_end = MAX2(push_end, fs_msaa_flags_end);
}
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
/* For compute shaders, we always have to have the subgroup ID. The
* back-end compiler will "helpfully" add it for us in the last push
@@ -275,6 +284,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
map->push_ranges[0] = push_constant_range;
}
if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) {
struct brw_wm_prog_data *wm_prog_data =
container_of(prog_data, struct brw_wm_prog_data, base);
const uint32_t fs_msaa_flags_offset =
offsetof(struct anv_push_constants, fs.msaa_flags);
assert(fs_msaa_flags_offset >= push_start);
wm_prog_data->msaa_flags_param =
(fs_msaa_flags_offset - push_start) / 4;
}
/* Now that we're done computing the push constant portion of the
* bind map, hash it. This lets us quickly determine if the actual
* mapping has changed and not just a no-op pipeline change.

View File

@@ -316,6 +316,10 @@ anv_check_for_primitive_replication(struct anv_device *device,
if (stages & ~(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT))
return false;
/* It's possible we have no vertex shader yet (with pipeline libraries) */
if (!(stages & VK_SHADER_STAGE_VERTEX_BIT))
return false;
int view_count = util_bitcount(view_mask);
if (view_count == 1 || view_count > primitive_replication_max_views)
return false;

File diff suppressed because it is too large Load Diff

View File

@@ -2437,20 +2437,30 @@ struct anv_push_constants {
*/
uint64_t desc_sets[MAX_SETS];
struct {
/** Base workgroup ID
*
* Used for vkCmdDispatchBase.
*/
uint32_t base_work_group_id[3];
union {
struct {
/** Dynamic MSAA value */
uint32_t msaa_flags;
/** Subgroup ID
*
* This is never set by software but is implicitly filled out when
* uploading the push constants for compute shaders.
*/
uint32_t subgroup_id;
} cs;
/** Pad out to a multiple of 32 bytes */
uint32_t pad[1];
} fs;
struct {
/** Base workgroup ID
*
* Used for vkCmdDispatchBase.
*/
uint32_t base_work_group_id[3];
/** Subgroup ID
*
* This is never set by software but is implicitly filled out when
* uploading the push constants for compute shaders.
*/
uint32_t subgroup_id;
} cs;
};
};
struct anv_surface_state {
@@ -3121,10 +3131,12 @@ anv_shader_bin_create(struct anv_device *device,
const struct anv_pipeline_bind_map *bind_map,
const struct anv_push_descriptor_info *push_desc_info);
static inline void
static inline struct anv_shader_bin *
anv_shader_bin_ref(struct anv_shader_bin *shader)
{
vk_pipeline_cache_object_ref(&shader->base);
return shader;
}
static inline void
@@ -3144,10 +3156,17 @@ struct anv_pipeline_executable {
enum anv_pipeline_type {
ANV_PIPELINE_GRAPHICS,
ANV_PIPELINE_GRAPHICS_LIB,
ANV_PIPELINE_COMPUTE,
ANV_PIPELINE_RAY_TRACING,
};
#define ALL_GRAPHICS_LIB_FLAGS \
(VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \
VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)
struct anv_pipeline {
struct vk_object_base base;
@@ -3185,10 +3204,48 @@ struct anv_pipeline {
struct anv_graphics_base_pipeline {
struct anv_pipeline base;
struct vk_sample_locations_state sample_locations;
/* Shaders */
struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
VkShaderStageFlags active_stages;
/* True if at the time the fragment shader was compiled, it didn't have all
* the information to avoid BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC.
*/
bool fragment_dynamic;
};
/* The library graphics pipeline object has a partial graphic state and
* possibly some shaders. If requested, shaders are also present in NIR early
* form.
*/
struct anv_graphics_lib_pipeline {
struct anv_graphics_base_pipeline base;
VkGraphicsPipelineLibraryFlagsEXT lib_flags;
struct vk_graphics_pipeline_all_state all_state;
struct vk_graphics_pipeline_state state;
/* Retained shaders for link optimization. */
struct {
/* This hash is the same as computed in
* anv_graphics_pipeline_gather_shaders().
*/
unsigned char shader_sha1[20];
enum gl_subgroup_size subgroup_size_type;
/* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
* constants.
*/
nir_shader * nir;
} retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
/* Whether the shaders have been retained */
bool retain_shaders;
};
/* The final graphics pipeline object has all the graphics state ready to be
@@ -3238,6 +3295,8 @@ struct anv_graphics_pipeline {
uint32_t vertex_input_elems;
uint32_t vertex_input_data[96];
enum brw_wm_msaa_flags fs_msaa_flags;
/* Pre computed CS instructions that can directly be copied into
* anv_cmd_buffer.
*/
@@ -3305,6 +3364,7 @@ struct anv_ray_tracing_pipeline {
ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
ANV_DECL_PIPELINE_DOWNCAST(graphics_base, ANV_PIPELINE_GRAPHICS)
ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
@@ -3315,6 +3375,13 @@ anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
}
static inline bool
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
gl_shader_stage stage)
{
return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
}
static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
{

View File

@@ -1497,7 +1497,8 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
wm_prog_data->uses_kill;
wm.BarycentricInterpolationMode =
wm_prog_data_barycentric_modes(wm_prog_data, 0);
wm_prog_data_barycentric_modes(wm_prog_data,
pipeline->fs_msaa_flags);
}
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
@@ -1525,7 +1526,10 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
ms != NULL ? ms->rasterization_samples : 1,
0 /* msaa_flags */);
pipeline->fs_msaa_flags);
const bool persample =
brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags);
ps.KernelStartPointer0 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
@@ -1541,8 +1545,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
POSOFFSET_SAMPLE: POSOFFSET_NONE;
ps.PositionXYOffsetSelect =
!wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID;
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
@@ -1582,7 +1587,7 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
ps.PixelShaderIsPerSample =
brw_wm_prog_data_is_persample(wm_prog_data, 0);
brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags);
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
@@ -1614,14 +1619,14 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
wm_prog_data->uses_depth_w_coefficients;
ps.PixelShaderIsPerCoarsePixel =
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
#endif
#if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses
* a fragment shading rate that is not constant.
*/
ps.EnablePSDependencyOnCPsizeChange =
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
#endif
}
}

View File

@@ -350,7 +350,7 @@ genX(emit_shading_rate)(struct anv_batch *batch,
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
const bool cps_enable = wm_prog_data &&
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
#if GFX_VER == 11
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
@@ -463,7 +463,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VER >= 11
if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)))
genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
#endif /* GFX_VER >= 11 */