intel/isl: Add and use multi-engine surf usage bits

Add and use two new surf usage bits:

* ISL_SURF_USAGE_MULTI_ENGINE_SEQ_BIT: the surface may be accessed by
  multiple engines, but not in parallel.

* ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT: the surface may be accessed by
  multiple engines in parallel.

Both usages are not concerned with read-after-read access patterns.

Using these bits allows ISL to conditionally use Tile64 or a 64KB
alignment to account for the gfx12.5 CCS WA from HSD 22015614752. Apart
from the potential space savings, there are three benefits of this
approach:

1) CCS can now be used with miptails (though nothing makes use of this
   today).

2) CCS can now be used with 3D depth/stencil surfaces in GL.

3) CCS can now be used with 3D depth/stencil surfaces in Vulkan when
   apps only use a single queue.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11111
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11117
Tested-by: Mark Janes <markjanes@swizzler.org>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29094>
This commit is contained in:
Nanley Chery
2024-04-15 11:09:01 -04:00
committed by Marge Bot
parent 3053268fd0
commit 6b969a4b43
4 changed files with 35 additions and 1 deletions

View File

@@ -788,6 +788,23 @@ iris_resource_configure_main(const struct iris_screen *screen,
ISL_SURF_USAGE_STENCIL_BIT : ISL_SURF_USAGE_DEPTH_BIT;
}
if ((usage & ISL_SURF_USAGE_TEXTURE_BIT) ||
!isl_surf_usage_is_depth_or_stencil(usage)) {
/* Notify ISL that iris may access this image from different engines.
* The reads and writes performed by the engines are guaranteed to be
* sequential with respect to each other. This is due to the
* implementation of flush_for_cross_batch_dependencies().
*/
usage |= ISL_SURF_USAGE_MULTI_ENGINE_SEQ_BIT;
} else {
/* Depth/stencil render buffers are the only surfaces which are not
* accessed by compute shaders. Also, iris does not use the blitter on
* such surfaces.
*/
assert(!(templ->bind & PIPE_BIND_SHADER_IMAGE));
assert(!(templ->bind & PIPE_BIND_PRIME_BLIT_DST));
}
const enum isl_format format =
iris_format_for_usage(screen->devinfo, templ->format, usage).fmt;

View File

@@ -1118,6 +1118,7 @@ isl_surf_choose_tiling(const struct isl_device *dev,
if (intel_needs_workaround(dev->info, 22015614752) &&
_isl_surf_info_supports_ccs(dev, info->format, info->usage) &&
(info->usage & ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT) &&
(info->levels > 1 || info->depth > 1 || info->array_len > 1)) {
/* There are issues with multiple engines accessing the same CCS
* cacheline in parallel. This can happen if this image has multiple
@@ -1724,6 +1725,7 @@ isl_choose_miptail_start_level(const struct isl_device *dev,
return 15;
if (intel_needs_workaround(dev->info, 22015614752) &&
(info->usage & ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT) &&
_isl_surf_info_supports_ccs(dev, info->format, info->usage)) {
/* There are issues with multiple engines accessing the same CCS
* cacheline in parallel. If we're here, Tile64 is use, providing enough
@@ -2711,7 +2713,9 @@ isl_calc_base_alignment(const struct isl_device *dev,
* It is expressed in terms of number of 256B block of CCS, where
* each 256B block of CCS corresponds to 64KB of main surface."
*/
if (intel_needs_workaround(dev->info, 22015614752)) {
if (intel_needs_workaround(dev->info, 22015614752) &&
(info->usage & (ISL_SURF_USAGE_MULTI_ENGINE_SEQ_BIT |
ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT))) {
base_alignment_B = MAX(base_alignment_B,
256 /* cacheline */ * 256 /* AUX ratio */);
}
@@ -3094,6 +3098,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
* in all cases. So, we choose to disable CCS.
*/
if (intel_needs_workaround(dev->info, 22015614752) &&
(surf->usage & ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT) &&
surf->dim == ISL_SURF_DIM_3D) {
assert(surf->tiling == ISL_TILING_4);
return false;
@@ -3114,6 +3119,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
* in all cases. So, we choose to disable CCS.
*/
if (intel_needs_workaround(dev->info, 22015614752) &&
(surf->usage & ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT) &&
surf->dim == ISL_SURF_DIM_3D) {
assert(surf->tiling == ISL_TILING_4);
return false;
@@ -3149,6 +3155,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
return false;
if (intel_needs_workaround(dev->info, 22015614752) &&
(surf->usage & ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT) &&
(surf->levels > 1 ||
surf->logical_level0_px.depth > 1 ||
surf->logical_level0_px.array_len > 1)) {

View File

@@ -1152,6 +1152,8 @@ typedef uint64_t isl_surf_usage_flags_t;
#define ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT (1u << 21)
#define ISL_SURF_USAGE_BLITTER_DST_BIT (1u << 22)
#define ISL_SURF_USAGE_BLITTER_SRC_BIT (1u << 23)
#define ISL_SURF_USAGE_MULTI_ENGINE_SEQ_BIT (1u << 24)
#define ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT (1u << 25)
/** @} */
/**

View File

@@ -1705,6 +1705,14 @@ anv_image_init(struct anv_device *device, struct anv_image *image,
image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
isl_extra_usage_flags |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
if (device->queue_count > 1) {
/* Notify ISL that the app may access this image from different engines.
* Note that parallel access to the surface will occur regardless of the
* sharing mode.
*/
isl_extra_usage_flags |= ISL_SURF_USAGE_MULTI_ENGINE_PAR_BIT;
}
const isl_tiling_flags_t isl_tiling_flags =
choose_isl_tiling_flags(device->info, create_info, isl_mod_info,
image->vk.wsi_legacy_scanout);