anv: implement VK_EXT_graphics_pipeline_library
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15637>
This commit is contained in:

committed by
Marge Bot

parent
0b8a2de2a1
commit
3d49cdb71e
@@ -63,3 +63,14 @@ dEQP-VK.dynamic_rendering.suballocation.load_store_op_none.stencil_d32_sfloat_s8
|
||||
|
||||
# New CTS failures in 1.3.5.0
|
||||
dEQP-VK.drm_format_modifiers.export_import_fmt_features2.b4g4r4a4_unorm_pack16,Crash
|
||||
|
||||
# Waiting on clarification from https://gitlab.khronos.org/vulkan/vulkan/-/issues/3115
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage_delayed_destroy,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage_no_cache,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage_delayed_destroy,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage_delayed_destroy,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_geometry_stage_fragment_stage_no_cache,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_fragment_stage_no_cache,Crash
|
||||
dEQP-VK.pipeline.pipeline_library.creation_feedback.graphics_tests.vertex_stage_tessellation_control_stage_tessellation_evaluation_stage_fragment_stage,Crash
|
||||
|
@@ -431,6 +431,12 @@ void anv_CmdBindPipeline(
|
||||
if (modified)
|
||||
cmd_buffer->state.push_constants_dirty |= stages;
|
||||
}
|
||||
|
||||
if ((gfx_pipeline->fs_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) &&
|
||||
push->fs.msaa_flags != gfx_pipeline->fs_msaa_flags) {
|
||||
push->fs.msaa_flags = gfx_pipeline->fs_msaa_flags;
|
||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -308,6 +308,7 @@ get_device_extensions(const struct anv_physical_device *device,
|
||||
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
|
||||
.EXT_global_priority_query = device->max_context_priority >=
|
||||
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
|
||||
.EXT_graphics_pipeline_library = true,
|
||||
.EXT_host_query_reset = true,
|
||||
.EXT_image_2d_view_of_3d = true,
|
||||
.EXT_image_robustness = true,
|
||||
@@ -1383,6 +1384,7 @@ void anv_GetPhysicalDeviceFeatures2(
|
||||
|
||||
/* VK_EXT_global_priority_query */
|
||||
.globalPriorityQuery = true,
|
||||
.graphicsPipelineLibrary = true,
|
||||
|
||||
/* VK_KHR_fragment_shading_rate */
|
||||
.pipelineFragmentShadingRate = true,
|
||||
@@ -2193,6 +2195,14 @@ void anv_GetPhysicalDeviceProperties2(
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT: {
|
||||
VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *props =
|
||||
(VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *)ext;
|
||||
props->graphicsPipelineLibraryFastLinking = true;
|
||||
props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
|
||||
VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
|
||||
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
|
||||
|
@@ -74,6 +74,7 @@ void anv_nir_apply_pipeline_layout(nir_shader *shader,
|
||||
void anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
bool fragment_dynamic,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
void *mem_ctx);
|
||||
|
@@ -32,6 +32,7 @@ void
|
||||
anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
bool fragment_dynamic,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
void *mem_ctx)
|
||||
@@ -102,6 +103,14 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||
push_end = MAX2(push_end, push_reg_mask_end);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) {
|
||||
const uint32_t fs_msaa_flags_start =
|
||||
offsetof(struct anv_push_constants, fs.msaa_flags);
|
||||
const uint32_t fs_msaa_flags_end = fs_msaa_flags_start + sizeof(uint32_t);
|
||||
push_start = MIN2(push_start, fs_msaa_flags_start);
|
||||
push_end = MAX2(push_end, fs_msaa_flags_end);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
|
||||
/* For compute shaders, we always have to have the subgroup ID. The
|
||||
* back-end compiler will "helpfully" add it for us in the last push
|
||||
@@ -275,6 +284,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||
map->push_ranges[0] = push_constant_range;
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && fragment_dynamic) {
|
||||
struct brw_wm_prog_data *wm_prog_data =
|
||||
container_of(prog_data, struct brw_wm_prog_data, base);
|
||||
|
||||
const uint32_t fs_msaa_flags_offset =
|
||||
offsetof(struct anv_push_constants, fs.msaa_flags);
|
||||
assert(fs_msaa_flags_offset >= push_start);
|
||||
wm_prog_data->msaa_flags_param =
|
||||
(fs_msaa_flags_offset - push_start) / 4;
|
||||
}
|
||||
|
||||
/* Now that we're done computing the push constant portion of the
|
||||
* bind map, hash it. This lets us quickly determine if the actual
|
||||
* mapping has changed and not just a no-op pipeline change.
|
||||
|
@@ -316,6 +316,10 @@ anv_check_for_primitive_replication(struct anv_device *device,
|
||||
if (stages & ~(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT))
|
||||
return false;
|
||||
|
||||
/* It's possible we have no vertex shader yet (with pipeline libraries) */
|
||||
if (!(stages & VK_SHADER_STAGE_VERTEX_BIT))
|
||||
return false;
|
||||
|
||||
int view_count = util_bitcount(view_mask);
|
||||
if (view_count == 1 || view_count > primitive_replication_max_views)
|
||||
return false;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -2437,20 +2437,30 @@ struct anv_push_constants {
|
||||
*/
|
||||
uint64_t desc_sets[MAX_SETS];
|
||||
|
||||
struct {
|
||||
/** Base workgroup ID
|
||||
*
|
||||
* Used for vkCmdDispatchBase.
|
||||
*/
|
||||
uint32_t base_work_group_id[3];
|
||||
union {
|
||||
struct {
|
||||
/** Dynamic MSAA value */
|
||||
uint32_t msaa_flags;
|
||||
|
||||
/** Subgroup ID
|
||||
*
|
||||
* This is never set by software but is implicitly filled out when
|
||||
* uploading the push constants for compute shaders.
|
||||
*/
|
||||
uint32_t subgroup_id;
|
||||
} cs;
|
||||
/** Pad out to a multiple of 32 bytes */
|
||||
uint32_t pad[1];
|
||||
} fs;
|
||||
|
||||
struct {
|
||||
/** Base workgroup ID
|
||||
*
|
||||
* Used for vkCmdDispatchBase.
|
||||
*/
|
||||
uint32_t base_work_group_id[3];
|
||||
|
||||
/** Subgroup ID
|
||||
*
|
||||
* This is never set by software but is implicitly filled out when
|
||||
* uploading the push constants for compute shaders.
|
||||
*/
|
||||
uint32_t subgroup_id;
|
||||
} cs;
|
||||
};
|
||||
};
|
||||
|
||||
struct anv_surface_state {
|
||||
@@ -3121,10 +3131,12 @@ anv_shader_bin_create(struct anv_device *device,
|
||||
const struct anv_pipeline_bind_map *bind_map,
|
||||
const struct anv_push_descriptor_info *push_desc_info);
|
||||
|
||||
static inline void
|
||||
static inline struct anv_shader_bin *
|
||||
anv_shader_bin_ref(struct anv_shader_bin *shader)
|
||||
{
|
||||
vk_pipeline_cache_object_ref(&shader->base);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -3144,10 +3156,17 @@ struct anv_pipeline_executable {
|
||||
|
||||
enum anv_pipeline_type {
|
||||
ANV_PIPELINE_GRAPHICS,
|
||||
ANV_PIPELINE_GRAPHICS_LIB,
|
||||
ANV_PIPELINE_COMPUTE,
|
||||
ANV_PIPELINE_RAY_TRACING,
|
||||
};
|
||||
|
||||
#define ALL_GRAPHICS_LIB_FLAGS \
|
||||
(VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \
|
||||
VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \
|
||||
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \
|
||||
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)
|
||||
|
||||
struct anv_pipeline {
|
||||
struct vk_object_base base;
|
||||
|
||||
@@ -3185,10 +3204,48 @@ struct anv_pipeline {
|
||||
struct anv_graphics_base_pipeline {
|
||||
struct anv_pipeline base;
|
||||
|
||||
struct vk_sample_locations_state sample_locations;
|
||||
|
||||
/* Shaders */
|
||||
struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
|
||||
|
||||
VkShaderStageFlags active_stages;
|
||||
|
||||
/* True if at the time the fragment shader was compiled, it didn't have all
|
||||
* the information to avoid BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC.
|
||||
*/
|
||||
bool fragment_dynamic;
|
||||
};
|
||||
|
||||
/* The library graphics pipeline object has a partial graphic state and
|
||||
* possibly some shaders. If requested, shaders are also present in NIR early
|
||||
* form.
|
||||
*/
|
||||
struct anv_graphics_lib_pipeline {
|
||||
struct anv_graphics_base_pipeline base;
|
||||
|
||||
VkGraphicsPipelineLibraryFlagsEXT lib_flags;
|
||||
|
||||
struct vk_graphics_pipeline_all_state all_state;
|
||||
struct vk_graphics_pipeline_state state;
|
||||
|
||||
/* Retained shaders for link optimization. */
|
||||
struct {
|
||||
/* This hash is the same as computed in
|
||||
* anv_graphics_pipeline_gather_shaders().
|
||||
*/
|
||||
unsigned char shader_sha1[20];
|
||||
|
||||
enum gl_subgroup_size subgroup_size_type;
|
||||
|
||||
/* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
|
||||
* constants.
|
||||
*/
|
||||
nir_shader * nir;
|
||||
} retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
|
||||
|
||||
/* Whether the shaders have been retained */
|
||||
bool retain_shaders;
|
||||
};
|
||||
|
||||
/* The final graphics pipeline object has all the graphics state ready to be
|
||||
@@ -3238,6 +3295,8 @@ struct anv_graphics_pipeline {
|
||||
uint32_t vertex_input_elems;
|
||||
uint32_t vertex_input_data[96];
|
||||
|
||||
enum brw_wm_msaa_flags fs_msaa_flags;
|
||||
|
||||
/* Pre computed CS instructions that can directly be copied into
|
||||
* anv_cmd_buffer.
|
||||
*/
|
||||
@@ -3305,6 +3364,7 @@ struct anv_ray_tracing_pipeline {
|
||||
|
||||
ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
|
||||
ANV_DECL_PIPELINE_DOWNCAST(graphics_base, ANV_PIPELINE_GRAPHICS)
|
||||
ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
|
||||
ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
|
||||
ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
|
||||
|
||||
@@ -3315,6 +3375,13 @@ anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
|
||||
return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
|
@@ -1497,7 +1497,8 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
|
||||
wm_prog_data->uses_kill;
|
||||
|
||||
wm.BarycentricInterpolationMode =
|
||||
wm_prog_data_barycentric_modes(wm_prog_data, 0);
|
||||
wm_prog_data_barycentric_modes(wm_prog_data,
|
||||
pipeline->fs_msaa_flags);
|
||||
}
|
||||
|
||||
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
|
||||
@@ -1525,7 +1526,10 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
|
||||
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
||||
ms != NULL ? ms->rasterization_samples : 1,
|
||||
0 /* msaa_flags */);
|
||||
pipeline->fs_msaa_flags);
|
||||
|
||||
const bool persample =
|
||||
brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags);
|
||||
|
||||
ps.KernelStartPointer0 = fs_bin->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
||||
@@ -1541,8 +1545,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
||||
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
|
||||
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
|
||||
wm_prog_data->base.ubo_ranges[0].length;
|
||||
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
||||
POSOFFSET_SAMPLE: POSOFFSET_NONE;
|
||||
ps.PositionXYOffsetSelect =
|
||||
!wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
|
||||
persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID;
|
||||
|
||||
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
|
||||
|
||||
@@ -1582,7 +1587,7 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
||||
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
||||
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
||||
ps.PixelShaderIsPerSample =
|
||||
brw_wm_prog_data_is_persample(wm_prog_data, 0);
|
||||
brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags);
|
||||
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
||||
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
||||
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
||||
@@ -1614,14 +1619,14 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
||||
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
|
||||
wm_prog_data->uses_depth_w_coefficients;
|
||||
ps.PixelShaderIsPerCoarsePixel =
|
||||
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
|
||||
brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
|
||||
#endif
|
||||
#if GFX_VERx10 >= 125
|
||||
/* TODO: We should only require this when the last geometry shader uses
|
||||
* a fragment shading rate that is not constant.
|
||||
*/
|
||||
ps.EnablePSDependencyOnCPsizeChange =
|
||||
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
|
||||
brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@@ -350,7 +350,7 @@ genX(emit_shading_rate)(struct anv_batch *batch,
|
||||
{
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
const bool cps_enable = wm_prog_data &&
|
||||
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
|
||||
brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
|
||||
|
||||
#if GFX_VER == 11
|
||||
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
|
||||
@@ -463,7 +463,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
#if GFX_VER >= 11
|
||||
if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
|
||||
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)))
|
||||
genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
|
||||
#endif /* GFX_VER >= 11 */
|
||||
|
||||
|
Reference in New Issue
Block a user