anv: add VK_EXT_host_image_copy support

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24276>
This commit is contained in:
Lionel Landwerlin
2023-06-12 13:19:14 +03:00
committed by Marge Bot
parent 3beb269721
commit 0317c44872
7 changed files with 731 additions and 25 deletions

View File

@@ -725,6 +725,7 @@ KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup_texture_2d_multisample_r16ui
KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup_texture_2d_multisample_array_r16i,Fail
KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup_texture_2d_multisample_rg8i,Fail
KHR-GL46.sparse_texture2_tests.SparseTexture2Commitment_texture_2d_multisample_r32i,Fail
KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup_texture_2d_multisample_array_r8ui,Fail
KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup_texture_2d_multisample_array_r8i,Fail
KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup_texture_2d_multisample_array_rg32i,Fail
KHR-GL46.sparse_texture2_tests.SparseTexture2Lookup_texture_2d_multisample_rg16,Fail

View File

@@ -580,7 +580,8 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device
VK_FORMAT_FEATURE_2_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_2_BLIT_DST_BIT |
VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT;
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT |
@@ -633,7 +634,8 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT |
VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT;
if (isl_format_supports_filtering(devinfo, plane_format.isl_format))
flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
@@ -840,6 +842,11 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device
*/
flags &= ~VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT;
flags &= ~VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT;
/* Host transfer don't touch the AUX data, so if that is required by
* the modifier, just drop support on the format.
*/
flags &= ~VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT;
}
if (isl_mod_info->supports_clear_color && plane_format.isl_format !=
@@ -1321,6 +1328,7 @@ anv_get_image_format_properties(
VkAndroidHardwareBufferUsageANDROID *android_usage = NULL;
VkTextureLODGatherFormatPropertiesAMD *texture_lod_gather_props = NULL;
VkImageCompressionPropertiesEXT *comp_props = NULL;
VkHostImageCopyDevicePerformanceQueryEXT *host_props = NULL;
bool from_wsi = false;
/* Extract input structs */
@@ -1371,6 +1379,9 @@ anv_get_image_format_properties(
case VK_STRUCTURE_TYPE_IMAGE_COMPRESSION_PROPERTIES_EXT:
comp_props = (void *) s;
break;
case VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT:
host_props = (void *) s;
break;
default:
vk_debug_ignored_stype(s->sType);
break;
@@ -1380,6 +1391,11 @@ anv_get_image_format_properties(
if (format == NULL)
goto unsupported;
/* Would require some annoying tracking or kernel support. */
if ((info->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) &&
(info->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT))
goto unsupported;
if (info->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
isl_mod_info = isl_drm_modifier_get_info(modifier_info->drmFormatModifier);
if (isl_mod_info == NULL)
@@ -1795,18 +1811,48 @@ anv_get_image_format_properties(
}
}
const bool aux_supported =
vk_format_has_depth(info->format) ||
isl_format_supports_ccs_d(devinfo, format->planes[0].isl_format) ||
anv_formats_ccs_e_compatible(devinfo, info->flags, info->format,
info->tiling, info->usage,
format_list_info);
if (comp_props) {
bool ccs_supported =
anv_formats_ccs_e_compatible(devinfo, info->flags, info->format,
info->tiling, info->usage,
format_list_info);
comp_props->imageCompressionFixedRateFlags =
VK_IMAGE_COMPRESSION_FIXED_RATE_NONE_EXT;
comp_props->imageCompressionFlags = ccs_supported ?
comp_props->imageCompressionFlags = aux_supported ?
VK_IMAGE_COMPRESSION_DEFAULT_EXT :
VK_IMAGE_COMPRESSION_DISABLED_EXT;
}
if (host_props) {
const bool compressed_format =
isl_format_is_compressed(format->planes[0].isl_format);
/* We're required to return optimalDeviceAccess for compressed formats:
*
* "If VkPhysicalDeviceImageFormatInfo2::format is a block-compressed
* format and vkGetPhysicalDeviceImageFormatProperties2 returns
* VK_SUCCESS, the implementation must return VK_TRUE in
* optimalDeviceAccess."
*
* When compression is not supported, the size of the image will not be
* changing to support host image transfers.
*
* TODO: We might be able to still allocate the compression data so that
* we can report identicalMemoryLayout=true, but we might still
* have to report optimalDeviceAccess=false to signal potential
* perf loss.
*/
if (compressed_format || !aux_supported) {
host_props->optimalDeviceAccess = true;
host_props->identicalMemoryLayout = true;
} else {
host_props->optimalDeviceAccess = false;
host_props->identicalMemoryLayout = false;
}
}
return VK_SUCCESS;
unsupported:

View File

@@ -247,6 +247,12 @@ anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
vk_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)
isl_usage |= ISL_SURF_USAGE_VIDEO_DECODE_BIT;
/* We disable aux surfaces for host read/write images so that we can update
* the main surface without caring about the auxiliary surface.
*/
if (vk_usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)
isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
if (vk_create_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
isl_usage |= ISL_SURF_USAGE_CUBE_BIT;
@@ -1866,7 +1872,7 @@ anv_image_finish(struct anv_image *image)
anv_device_unmap_bo(device,
image->bindings[b].address.bo,
image->bindings[b].host_map,
image->bindings[b].memory_range.size,
image->bindings[b].map_size,
false /* replace */);
}
}
@@ -2083,20 +2089,30 @@ anv_image_is_pat_compressible(struct anv_device *device, struct anv_image *image
* For images created with a color format, the memoryTypeBits member is
* identical for all VkImage objects created with the same combination
* of values for the tiling member, the
* VK_IMAGE_CREATE_SPARSE_BINDING_BIT bit of the flags member, the
* VK_IMAGE_CREATE_SPARSE_BINDING_BIT bit and
* VK_IMAGE_CREATE_PROTECTED_BIT bit of the flags member, the
* VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT bit of the flags
* member, handleTypes member of VkExternalMemoryImageCreateInfo, and
* the VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT of the usage member in
* the VkImageCreateInfo structure passed to vkCreateImage.
* member, the VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT bit of the usage
* member if the
* VkPhysicalDeviceHostImageCopyPropertiesEXT::identicalMemoryTypeRequirements
* property is VK_FALSE, handleTypes member of
* VkExternalMemoryImageCreateInfo, and the
* VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT of the usage member in the
* VkImageCreateInfo structure passed to vkCreateImage.
*
* For images created with a depth/stencil format, the memoryTypeBits
* member is identical for all VkImage objects created with the same
* combination of values for the format member, the tiling member, the
* VK_IMAGE_CREATE_SPARSE_BINDING_BIT bit of the flags member, the
* VK_IMAGE_CREATE_SPARSE_BINDING_BIT bit and
* VK_IMAGE_CREATE_PROTECTED_BIT bit of the flags member, the
* VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT bit of the flags
* member, handleTypes member of VkExternalMemoryImageCreateInfo, and
* the VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT of the usage member in
* the VkImageCreateInfo structure passed to vkCreateImage.
* member, the VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT bit of the usage
* member if the
* VkPhysicalDeviceHostImageCopyPropertiesEXT::identicalMemoryTypeRequirements
* property is VK_FALSE, handleTypes member of
* VkExternalMemoryImageCreateInfo, and the
* VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT of the usage member in the
* VkImageCreateInfo structure passed to vkCreateImage.
*/
/* There are no compression-enabled modifiers on Xe2, and all legacy
@@ -2503,18 +2519,28 @@ anv_image_bind_address(struct anv_device *device,
image->bindings[binding].address = address;
/* Map bindings for images with host transfer usage, so that we don't have
* to map/unmap things at every host operation.
* to map/unmap things at every host operation. We map cached, that means
* that the copy operations need to cflush on platforms that have no
* host_cache+host_coherent memory types.
*/
if (image->vk.usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) {
uint64_t offset = image->bindings[binding].address.offset +
image->bindings[binding].memory_range.offset;
uint64_t map_offset, map_size;
anv_sanitize_map_params(device, offset,
image->bindings[binding].memory_range.size,
&map_offset, &map_size);
VkResult result = anv_device_map_bo(device,
image->bindings[binding].address.bo,
image->bindings[binding].address.offset +
image->bindings[binding].memory_range.offset,
image->bindings[binding].memory_range.size,
map_offset, map_size,
NULL /* placed_addr */,
&image->bindings[binding].host_map);
if (result != VK_SUCCESS)
return result;
image->bindings[binding].map_delta = (offset - map_offset);
image->bindings[binding].map_size = map_size;
}
ANV_RMV(image_bind, device, image, binding);
@@ -2708,7 +2734,8 @@ VkResult anv_BindImageMemory2(
}
static void
anv_get_image_subresource_layout(const struct anv_image *image,
anv_get_image_subresource_layout(struct anv_device *device,
const struct anv_image *image,
const VkImageSubresource2KHR *subresource,
VkSubresourceLayout2KHR *layout)
{
@@ -2804,11 +2831,11 @@ anv_get_image_subresource_layout(const struct anv_image *image,
row_pitch_B = isl_surf->row_pitch_B;
}
const uint32_t level = subresource->imageSubresource.mipLevel;
if (isl_surf) {
/* ISL tries to give us a single layer but the Vulkan API expect the
* entire 3D size.
*/
const uint32_t level = subresource->imageSubresource.mipLevel;
const uint32_t layer = subresource->imageSubresource.arrayLayer;
const uint32_t z = u_minify(isl_surf->logical_level0_px.d, level) - 1;
uint64_t z0_start_tile_B, z0_end_tile_B;
@@ -2834,6 +2861,37 @@ anv_get_image_subresource_layout(const struct anv_image *image,
layout->subresourceLayout.arrayPitch = 0;
}
VkSubresourceHostMemcpySizeEXT *host_memcpy_size =
vk_find_struct(layout->pNext, SUBRESOURCE_HOST_MEMCPY_SIZE_EXT);
if (host_memcpy_size) {
if (!isl_surf) {
host_memcpy_size->size = 0;
} else if (anv_image_can_host_memcpy(image)) {
host_memcpy_size->size = layout->subresourceLayout.size;
} else {
/* If we cannot do straight memcpy of the image, compute a linear
* size. This will be the format in which we store the data.
*/
struct isl_surf lin_surf;
bool ok =
isl_surf_init(&device->physical->isl_dev, &lin_surf,
.dim = isl_surf->dim,
.format = isl_surf->format,
.width = u_minify(
isl_surf->logical_level0_px.w, level),
.height = u_minify(
isl_surf->logical_level0_px.h, level),
.depth = u_minify(
isl_surf->logical_level0_px.d, level),
.array_len = 1,
.levels = 1,
.samples = isl_surf->samples,
.tiling_flags = ISL_TILING_LINEAR_BIT);
assert(ok);
host_memcpy_size->size = lin_surf.size_B;
}
}
VkImageCompressionPropertiesEXT *comp_props =
vk_find_struct(layout->pNext, IMAGE_COMPRESSION_PROPERTIES_EXT);
if (comp_props) {
@@ -2864,18 +2922,19 @@ void anv_GetDeviceImageSubresourceLayoutKHR(
return;
}
anv_get_image_subresource_layout(&image, pInfo->pSubresource, pLayout);
anv_get_image_subresource_layout(device, &image, pInfo->pSubresource, pLayout);
}
void anv_GetImageSubresourceLayout2KHR(
VkDevice device,
VkDevice _device,
VkImage _image,
const VkImageSubresource2KHR* pSubresource,
VkSubresourceLayout2KHR* pLayout)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_image, image, _image);
anv_get_image_subresource_layout(image, pSubresource, pLayout);
anv_get_image_subresource_layout(device, image, pSubresource, pLayout);
}
static VkImageUsageFlags

View File

@@ -0,0 +1,506 @@
/* Copyright © 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include <assert.h>
#include <stdbool.h>
#include "anv_private.h"
#include "util/u_cpu_detect.h"
#include "util/u_debug.h"
#include "vk_util.h"
static inline VkOffset3D
vk_offset3d_to_el(enum isl_format format, VkOffset3D offset)
{
const struct isl_format_layout *fmt_layout =
isl_format_get_layout(format);
return (VkOffset3D) {
.x = offset.x / fmt_layout->bw,
.y = offset.y / fmt_layout->bh,
.z = offset.z / fmt_layout->bd,
};
}
static inline VkExtent3D
vk_extent3d_to_el(enum isl_format format, VkExtent3D extent)
{
const struct isl_format_layout *fmt_layout =
isl_format_get_layout(format);
return (VkExtent3D) {
.width = DIV_ROUND_UP(extent.width, fmt_layout->bw),
.height = DIV_ROUND_UP(extent.height, fmt_layout->bh),
.depth = DIV_ROUND_UP(extent.depth, fmt_layout->bd),
};
}
static void
anv_memcpy_image_memory(struct anv_device *device,
const struct isl_surf *surf,
const struct anv_image_binding *binding,
uint64_t binding_offset,
void *mem_ptr,
uint32_t level,
uint32_t base_img_array_layer,
uint32_t base_img_z_offset_px,
uint32_t array_layer,
uint32_t z_offset_px,
bool mem_to_img)
{
uint64_t start_tile_B, end_tile_B;
isl_surf_get_image_range_B_tile(surf, level,
base_img_array_layer,
base_img_z_offset_px,
&start_tile_B, &end_tile_B);
uint32_t array_pitch_B = isl_surf_get_array_pitch(surf);
uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer,
base_img_z_offset_px + z_offset_px);
uint32_t mem_depth_or_layer = MAX2(z_offset_px, array_layer);
void *img_ptr = binding->host_map + binding->map_delta + binding_offset;
if (mem_to_img) {
memcpy(img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B,
mem_ptr + mem_depth_or_layer * array_pitch_B,
end_tile_B - start_tile_B);
} else {
memcpy(mem_ptr + mem_depth_or_layer * array_pitch_B,
img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B,
end_tile_B - start_tile_B);
}
}
static void
get_image_offset_el(const struct isl_surf *surf, unsigned level, unsigned z,
uint32_t *out_x0_el, uint32_t *out_y0_el)
{
ASSERTED uint32_t z0_el, a0_el;
if (surf->dim == ISL_SURF_DIM_3D) {
isl_surf_get_image_offset_el(surf, level, 0, z,
out_x0_el, out_y0_el, &z0_el, &a0_el);
} else {
isl_surf_get_image_offset_el(surf, level, z, 0,
out_x0_el, out_y0_el, &z0_el, &a0_el);
}
assert(z0_el == 0 && a0_el == 0);
}
/* Compute extent parameters for use with tiled_memcpy functions.
* xs are in units of bytes and ys are in units of strides.
*/
static inline void
tile_extents(const struct isl_surf *surf,
const VkOffset3D *offset_el,
const VkExtent3D *extent_el,
unsigned level, int z,
uint32_t *x1_B, uint32_t *x2_B,
uint32_t *y1_el, uint32_t *y2_el)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
const unsigned cpp = fmtl->bpb / 8;
/* z contains offset->z */
assert (z >= offset_el->z);
unsigned x0_el, y0_el;
get_image_offset_el(surf, level, z, &x0_el, &y0_el);
*x1_B = (offset_el->x + x0_el) * cpp;
*y1_el = offset_el->y + y0_el;
*x2_B = (offset_el->x + extent_el->width + x0_el) * cpp;
*y2_el = offset_el->y + extent_el->height + y0_el;
}
static void
anv_copy_image_memory(struct anv_device *device,
const struct isl_surf *surf,
const struct anv_image_binding *binding,
uint64_t binding_offset,
void *mem_ptr,
uint64_t mem_row_pitch_B,
uint64_t mem_height_pitch_B,
const VkOffset3D *offset_el,
const VkExtent3D *extent_el,
uint32_t level,
uint32_t base_img_array_layer,
uint32_t base_img_z_offset_px,
uint32_t array_layer,
uint32_t z_offset_px,
bool mem_to_img)
{
const struct isl_format_layout *fmt_layout =
isl_format_get_layout(surf->format);
const uint32_t bs = fmt_layout->bpb / 8;
void *img_ptr = binding->host_map + binding->map_delta + binding_offset;
uint64_t start_tile_B, end_tile_B;
isl_surf_get_image_range_B_tile(surf, level,
base_img_array_layer + array_layer,
base_img_z_offset_px + z_offset_px,
&start_tile_B, &end_tile_B);
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
const bool need_invalidate_flush =
(binding->address.bo->flags & ANV_BO_ALLOC_HOST_COHERENT) == 0 &&
device->physical->memory.need_flush;
if (need_invalidate_flush && !mem_to_img)
intel_invalidate_range(img_ptr + start_tile_B, end_tile_B - start_tile_B);
#endif
uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer,
base_img_z_offset_px + z_offset_px);
uint32_t mem_depth_or_layer = MAX2(z_offset_px, array_layer);
if (surf->tiling == ISL_TILING_LINEAR) {
uint64_t img_col_offset = offset_el->x * bs;
uint64_t row_copy_size = extent_el->width * bs;
for (uint32_t h_el = 0; h_el < extent_el->height; h_el++) {
uint64_t mem_row_offset =
mem_height_pitch_B * mem_depth_or_layer +
h_el * mem_row_pitch_B;
uint64_t img_row = h_el + offset_el->y;
uint64_t img_offset =
start_tile_B + img_row * surf->row_pitch_B + img_col_offset;
assert((img_offset + row_copy_size) <= binding->memory_range.size);
if (mem_to_img)
memcpy(img_ptr + img_offset, mem_ptr + mem_row_offset, row_copy_size);
else
memcpy(mem_ptr + mem_row_offset, img_ptr + img_offset, row_copy_size);
}
} else {
uint32_t x1, x2, y1, y2;
tile_extents(surf, offset_el, extent_el, level, img_depth_or_layer,
&x1, &x2, &y1, &y2);
if (mem_to_img) {
isl_memcpy_linear_to_tiled(x1, x2, y1, y2,
img_ptr,
mem_ptr + mem_height_pitch_B * mem_depth_or_layer,
surf->row_pitch_B,
mem_row_pitch_B,
false,
surf->tiling,
ISL_MEMCPY);
} else {
isl_memcpy_tiled_to_linear(x1, x2, y1, y2,
mem_ptr + mem_height_pitch_B * mem_depth_or_layer,
img_ptr,
mem_row_pitch_B,
surf->row_pitch_B,
false,
surf->tiling,
#if defined(USE_SSE41)
util_get_cpu_caps()->has_sse4_1 ?
ISL_MEMCPY_STREAMING_LOAD :
#endif
ISL_MEMCPY);
}
}
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (need_invalidate_flush && mem_to_img)
intel_flush_range(img_ptr + start_tile_B, end_tile_B - start_tile_B);
#endif
}
static uint64_t
calc_mem_row_pitch_B(const struct isl_surf *surf,
uint64_t api_row_length_px,
const VkExtent3D *extent_px)
{
const struct isl_format_layout *fmt_layout =
isl_format_get_layout(surf->format);
const uint32_t bs = fmt_layout->bpb / 8;
return api_row_length_px != 0 ?
(bs * DIV_ROUND_UP(api_row_length_px, fmt_layout->bw)) :
(bs * DIV_ROUND_UP(extent_px->width, fmt_layout->bw));
}
static uint64_t
calc_mem_height_pitch_B(const struct isl_surf *surf,
uint64_t row_pitch_B,
uint64_t api_height_px,
const VkExtent3D *extent_px)
{
const struct isl_format_layout *fmt_layout =
isl_format_get_layout(surf->format);
return api_height_px != 0 ?
(row_pitch_B * DIV_ROUND_UP(api_height_px, fmt_layout->bh)) :
(row_pitch_B * DIV_ROUND_UP(extent_px->height, fmt_layout->bh));
}
VkResult
anv_CopyMemoryToImageEXT(
VkDevice _device,
const VkCopyMemoryToImageInfoEXT* pCopyMemoryToImageInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_image, image, pCopyMemoryToImageInfo->dstImage);
for (uint32_t r = 0; r < pCopyMemoryToImageInfo->regionCount; r++) {
const VkMemoryToImageCopyEXT *region =
&pCopyMemoryToImageInfo->pRegions[r];
const uint32_t plane =
anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask);
const struct anv_surface *anv_surf =
&image->planes[plane].primary_surface;
const struct isl_surf *surf = &anv_surf->isl;
const struct anv_image_binding *binding =
&image->bindings[anv_surf->memory_range.binding];
assert(binding->host_map != NULL);
/* Memory distance between each row */
uint64_t mem_row_pitch_B =
calc_mem_row_pitch_B(surf, region->memoryRowLength,
&region->imageExtent);
/* Memory distance between each slice (1 3D level or 1 array layer) */
uint64_t mem_height_pitch_B =
calc_mem_height_pitch_B(surf, mem_row_pitch_B,
region->memoryImageHeight,
&region->imageExtent);
VkOffset3D offset_el =
vk_offset3d_to_el(surf->format, region->imageOffset);
VkExtent3D extent_el =
vk_extent3d_to_el(surf->format, region->imageExtent);
for (uint32_t a = 0; a < region->imageSubresource.layerCount; a++) {
for (uint32_t z = 0; z < region->imageExtent.depth; z++) {
if ((pCopyMemoryToImageInfo->flags &
VK_HOST_IMAGE_COPY_MEMCPY_EXT) &&
anv_image_can_host_memcpy(image)) {
anv_memcpy_image_memory(device, surf, binding,
anv_surf->memory_range.offset,
(void *)region->pHostPointer,
region->imageSubresource.mipLevel,
region->imageSubresource.baseArrayLayer,
region->imageOffset.z,
a, z, true /* mem_to_img */);
} else {
anv_copy_image_memory(device, surf,
binding, anv_surf->memory_range.offset,
(void *)region->pHostPointer,
mem_row_pitch_B,
mem_height_pitch_B,
&offset_el,
&extent_el,
region->imageSubresource.mipLevel,
region->imageSubresource.baseArrayLayer,
region->imageOffset.z,
a, z, true /* mem_to_img */);
}
}
}
}
return VK_SUCCESS;
}
VkResult
anv_CopyImageToMemoryEXT(
VkDevice _device,
const VkCopyImageToMemoryInfoEXT* pCopyImageToMemoryInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_image, image, pCopyImageToMemoryInfo->srcImage);
for (uint32_t r = 0; r < pCopyImageToMemoryInfo->regionCount; r++) {
const VkImageToMemoryCopyEXT *region =
&pCopyImageToMemoryInfo->pRegions[r];
const uint32_t plane =
anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask);
const struct anv_surface *anv_surf =
&image->planes[plane].primary_surface;
const struct isl_surf *surf = &anv_surf->isl;
const struct anv_image_binding *binding =
&image->bindings[anv_surf->memory_range.binding];
assert(binding->host_map != NULL);
VkOffset3D offset_el =
vk_offset3d_to_el(surf->format, region->imageOffset);
VkExtent3D extent_el =
vk_extent3d_to_el(surf->format, region->imageExtent);
/* Memory distance between each row */
uint64_t mem_row_pitch_B =
calc_mem_row_pitch_B(surf, region->memoryRowLength,
&region->imageExtent);
/* Memory distance between each slice (1 3D level or 1 array layer) */
uint64_t mem_height_pitch_B =
calc_mem_height_pitch_B(surf, mem_row_pitch_B,
region->memoryImageHeight,
&region->imageExtent);
for (uint32_t a = 0; a < region->imageSubresource.layerCount; a++) {
for (uint32_t z = 0; z < region->imageExtent.depth; z++) {
if ((pCopyImageToMemoryInfo->flags &
VK_HOST_IMAGE_COPY_MEMCPY_EXT) &&
anv_image_can_host_memcpy(image)) {
anv_memcpy_image_memory(device, surf, binding,
anv_surf->memory_range.offset,
region->pHostPointer,
region->imageSubresource.mipLevel,
region->imageSubresource.baseArrayLayer,
region->imageOffset.z,
a, z, false /* mem_to_img */);
} else {
anv_copy_image_memory(device, surf,
binding, anv_surf->memory_range.offset,
region->pHostPointer,
mem_row_pitch_B,
mem_height_pitch_B,
&offset_el,
&extent_el,
region->imageSubresource.mipLevel,
region->imageSubresource.baseArrayLayer,
region->imageOffset.z,
a, z, false /* mem_to_img */);
}
}
}
}
return VK_SUCCESS;
}
VkResult
anv_CopyImageToImageEXT(
VkDevice _device,
const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToImageInfo->srcImage);
ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageToImageInfo->dstImage);
/* Work with a tile's worth of data */
void *tmp_map = vk_alloc(&device->vk.alloc, 4096, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (tmp_map == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
for (uint32_t r = 0; r < pCopyImageToImageInfo->regionCount; r++) {
const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[r];
const uint32_t src_plane =
anv_image_aspect_to_plane(src_image,
region->srcSubresource.aspectMask);
const uint32_t dst_plane =
anv_image_aspect_to_plane(dst_image,
region->srcSubresource.aspectMask);
const struct anv_surface *src_anv_surf =
&src_image->planes[src_plane].primary_surface;
const struct anv_surface *dst_anv_surf =
&dst_image->planes[dst_plane].primary_surface;
const struct isl_surf *src_surf = &src_anv_surf->isl;
const struct isl_surf *dst_surf = &dst_anv_surf->isl;
const struct anv_image_binding *src_binding =
&src_image->bindings[src_anv_surf->memory_range.binding];
const struct anv_image_binding *dst_binding =
&dst_image->bindings[dst_anv_surf->memory_range.binding];
struct isl_tile_info src_tile;
struct isl_tile_info dst_tile;
isl_surf_get_tile_info(src_surf, &src_tile);
isl_surf_get_tile_info(dst_surf, &dst_tile);
uint32_t tile_width_B;
uint32_t tile_width_el, tile_height_el;
if (src_tile.phys_extent_B.w > dst_tile.phys_extent_B.w) {
tile_width_B = src_tile.phys_extent_B.w;
tile_width_el = src_tile.logical_extent_el.w;
tile_height_el = src_tile.logical_extent_el.h;
} else {
tile_width_B = dst_tile.phys_extent_B.w;
tile_width_el = dst_tile.logical_extent_el.w;
tile_height_el = dst_tile.logical_extent_el.h;
}
/* There is no requirement that the extent be aligned to the texel block
* size.
*/
VkOffset3D src_offset_el =
vk_offset3d_to_el(src_surf->format, region->srcOffset);
VkOffset3D dst_offset_el =
vk_offset3d_to_el(src_surf->format, region->dstOffset);
VkExtent3D extent_el =
vk_extent3d_to_el(src_surf->format, region->extent);
/* linear-to-linear case */
if (tile_width_el == 1 && tile_height_el == 1) {
tile_width_el = MIN2(4096 / (src_tile.format_bpb / 8),
extent_el.width);
tile_height_el = 4096 / (tile_width_el * (src_tile.format_bpb / 8));
tile_width_B = tile_width_el * src_tile.format_bpb / 8;
}
for (uint32_t a = 0; a < region->srcSubresource.layerCount; a++) {
for (uint32_t z = 0; z < region->extent.depth; z++) {
for (uint32_t y_el = 0; y_el < extent_el.height; y_el += tile_height_el) {
for (uint32_t x_el = 0; x_el < extent_el.width; x_el += tile_width_el) {
VkOffset3D src_offset = {
.x = src_offset_el.x + x_el,
.y = src_offset_el.y + y_el,
};
VkOffset3D dst_offset = {
.x = dst_offset_el.x + x_el,
.y = dst_offset_el.y + y_el,
};
VkExtent3D extent = {
.width = MIN2(extent_el.width - src_offset.x,
tile_width_el),
.height = MIN2(extent_el.height - src_offset.y,
tile_height_el),
.depth = 1,
};
anv_copy_image_memory(device, src_surf,
src_binding,
src_anv_surf->memory_range.offset,
tmp_map,
tile_width_B, 0,
&src_offset, &extent,
region->srcSubresource.mipLevel,
region->srcSubresource.baseArrayLayer,
region->srcOffset.z,
a, z,
false /* mem_to_img */);
anv_copy_image_memory(device, dst_surf,
dst_binding,
dst_anv_surf->memory_range.offset,
tmp_map,
tile_width_B, 0,
&dst_offset, &extent,
region->dstSubresource.mipLevel,
region->dstSubresource.baseArrayLayer,
region->dstOffset.z,
a, z,
true /* mem_to_img */);
}
}
}
}
}
vk_free(&device->vk.alloc, tmp_map);
return VK_SUCCESS;
}
VkResult
anv_TransitionImageLayoutEXT(
VkDevice device,
uint32_t transitionCount,
const VkHostImageLayoutTransitionInfoEXT* pTransitions)
{
/* Our layout transitions are mostly about resolving the auxiliary surface
* into the main surface. Since we disable the auxiliary surface, there is
* nothing here for us to do.
*/
return VK_SUCCESS;
}

View File

@@ -215,6 +215,7 @@ get_device_extensions(const struct anv_physical_device *device,
.EXT_global_priority_query = device->max_context_priority >=
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
.EXT_graphics_pipeline_library = !debug_get_bool_option("ANV_NO_GPL", false),
.EXT_host_image_copy = !device->emu_astc_ldr,
.EXT_host_query_reset = true,
.EXT_image_2d_view_of_3d = true,
/* Because of Xe2 PAT selected compression and the Vulkan spec
@@ -832,6 +833,9 @@ get_features(const struct anv_physical_device *pdevice,
/* VK_EXT_pipeline_protected_access */
.pipelineProtectedAccess = true,
/* VK_EXT_host_image_copy */
.hostImageCopy = true,
};
/* The new DOOM and Wolfenstein games require depthBounds without
@@ -1531,6 +1535,77 @@ get_properties(const struct anv_physical_device *pdevice,
props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
}
/* VK_EXT_host_image_copy */
{
static const VkImageLayout supported_layouts[] = {
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR,
VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR,
};
props->pCopySrcLayouts = (VkImageLayout *) supported_layouts;
props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
props->pCopyDstLayouts = (VkImageLayout *) supported_layouts;
props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
/* This UUID essentially tells you if you can share an optimially tiling
* image with another driver. Much of the tiling decisions are based on :
*
* - device generation (different tilings based on generations)
* - device workarounds
* - driver build (as we implement workarounds or performance tunings,
* the tiling decision changes)
*
* So we're using a hash of the verx10 field + driver_build_sha1.
*
* Unfortunately there is a HW issue on SKL GT4 that makes it use some
* different tilings sometimes (see isl_gfx7.c).
*/
{
struct mesa_sha1 sha1_ctx;
uint8_t sha1[20];
_mesa_sha1_init(&sha1_ctx);
_mesa_sha1_update(&sha1_ctx, pdevice->driver_build_sha1,
sizeof(pdevice->driver_build_sha1));
_mesa_sha1_update(&sha1_ctx, &pdevice->info.platform,
sizeof(pdevice->info.platform));
if (pdevice->info.platform == INTEL_PLATFORM_SKL &&
pdevice->info.gt == 4) {
_mesa_sha1_update(&sha1_ctx, &pdevice->info.gt,
sizeof(pdevice->info.gt));
}
_mesa_sha1_final(&sha1_ctx, sha1);
assert(ARRAY_SIZE(sha1) >= VK_UUID_SIZE);
memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
}
/* System without ReBAR cannot map all memory types on the host and that
* affects the memory types an image can use for host memory copies.
*
* System with compressed memory types also cannot expose all image
* memory types for host image copies.
*/
props->identicalMemoryTypeRequirements = pdevice->has_small_bar ||
pdevice->memory.compressed_mem_types != 0;
}
/* VK_EXT_legacy_vertex_attributes */
{
props->nativeUnalignedPerformance = true;

View File

@@ -5358,6 +5358,8 @@ struct anv_image {
struct anv_address address;
struct anv_sparse_binding_data sparse_data;
void *host_map;
uint64_t map_delta;
uint64_t map_size;
} bindings[ANV_IMAGE_MEMORY_BINDING_END];
/**
@@ -5462,6 +5464,22 @@ anv_image_format_is_d16_or_s8(const struct anv_image *image)
image->vk.format == VK_FORMAT_S8_UINT;
}
static inline bool
anv_image_can_host_memcpy(const struct anv_image *image)
{
const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
struct isl_tile_info tile_info;
isl_surf_get_tile_info(surf, &tile_info);
const bool array_pitch_aligned_to_tile =
surf->array_pitch_el_rows % tile_info.logical_extent_el.height == 0;
return image->vk.tiling != VK_IMAGE_TILING_LINEAR &&
image->n_planes == 1 &&
array_pitch_aligned_to_tile &&
image->vk.mip_levels == 1;
}
/* The ordering of this enum is important */
enum anv_fast_clear_type {
/** Image does not have/support any fast-clear blocks */

View File

@@ -158,6 +158,7 @@ libanv_files = files(
'anv_formats.c',
'anv_genX.h',
'anv_image.c',
'anv_image_host_copy.c',
'anv_image_view.c',
'anv_internal_kernels.c',
'anv_internal_kernels.h',