vulkan/wsi/radv: add initial prime support (v1.1)

This is a complete rewrite of my previous rfc patches.

This adds the ability to present to a different GPU that rendering
using a driver side operation that can copy from the tiled to
linear shared image.

This does prime support completely in the swapchain present code,
and each queue has a precreated command buffer for each image
and for the each queue family. This means presenting should work
on graphics and compute queues and transfer in the future.

v1.1: initialise needs_linear_copy in swapchain.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Mike Lothian <mike@fireburn.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie
2016-11-23 12:59:55 +10:00
parent 336b05c49a
commit f695735ed6
10 changed files with 225 additions and 21 deletions

View File

@@ -208,6 +208,9 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
uint32_t region_count,
const VkImageResolve *regions);
void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
struct radv_image *linear_image);
#ifdef __cplusplus
}
#endif

View File

@@ -430,3 +430,23 @@ void radv_CmdCopyImage(
meta_copy_image(cmd_buffer, src_image, dest_image,
regionCount, pRegions);
}
void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
struct radv_image *linear_image)
{
struct VkImageCopy image_copy = { 0 };
image_copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_copy.srcSubresource.layerCount = 1;
image_copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_copy.dstSubresource.layerCount = 1;
image_copy.extent.width = image->extent.width;
image_copy.extent.height = image->extent.height;
image_copy.extent.depth = 1;
meta_copy_image(cmd_buffer, image, linear_image,
1, &image_copy);
}

View File

@@ -24,6 +24,7 @@
*/
#include "radv_private.h"
#include "radv_meta.h"
#include "wsi_common.h"
static const struct wsi_callbacks wsi_cbs = {
@@ -92,7 +93,7 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
return iface->get_support(surface, &device->wsi_device,
&device->instance->alloc,
queueFamilyIndex, device->local_fd, pSupported);
queueFamilyIndex, device->local_fd, true, pSupported);
}
VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
@@ -139,6 +140,8 @@ static VkResult
radv_wsi_image_create(VkDevice device_h,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
bool needs_linear_copy,
bool linear,
VkImage *image_p,
VkDeviceMemory *memory_p,
uint32_t *size,
@@ -169,7 +172,7 @@ radv_wsi_image_create(VkDevice device_h,
.arrayLayers = 1,
.samples = 1,
/* FIXME: Need a way to use X tiling to allow scanout */
.tiling = VK_IMAGE_TILING_OPTIMAL,
.tiling = linear ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.flags = 0,
},
@@ -180,14 +183,14 @@ radv_wsi_image_create(VkDevice device_h,
return result;
image = radv_image_from_handle(image_h);
VkDeviceMemory memory_h;
struct radv_device_memory *memory;
result = radv_AllocateMemory(device_h,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = image->size,
.memoryTypeIndex = 0,
.memoryTypeIndex = linear ? 1 : 0,
},
NULL /* XXX: pAllocator */,
&memory_h);
@@ -198,21 +201,28 @@ radv_wsi_image_create(VkDevice device_h,
radv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0);
bret = device->ws->buffer_get_fd(device->ws,
memory->bo, &fd);
if (bret == false)
goto fail_alloc_memory;
/*
* return the fd for the image in the no copy mode,
* or the fd for the linear image if a copy is required.
*/
if (!needs_linear_copy || (needs_linear_copy && linear)) {
bret = device->ws->buffer_get_fd(device->ws,
memory->bo, &fd);
if (bret == false)
goto fail_alloc_memory;
*fd_p = fd;
}
{
struct radeon_bo_metadata metadata;
radv_init_metadata(device, image, &metadata);
device->ws->buffer_set_metadata(memory->bo, &metadata);
}
surface = &image->surface;
*image_p = image_h;
*memory_p = memory_h;
*fd_p = fd;
*size = image->size;
*offset = image->offset;
*row_pitch = surface->level[0].pitch_bytes;
@@ -242,6 +252,94 @@ static const struct wsi_image_fns radv_wsi_image_fns = {
.free_wsi_image = radv_wsi_image_free,
};
#define NUM_PRIME_POOLS RADV_QUEUE_TRANSFER
static void
radv_wsi_free_prime_command_buffers(struct radv_device *device,
struct wsi_swapchain *swapchain)
{
const int num_pools = NUM_PRIME_POOLS;
const int num_images = swapchain->image_count;
int i;
for (i = 0; i < num_pools; i++) {
radv_FreeCommandBuffers(radv_device_to_handle(device),
swapchain->cmd_pools[i],
swapchain->image_count,
&swapchain->cmd_buffers[i * num_images]);
radv_DestroyCommandPool(radv_device_to_handle(device),
swapchain->cmd_pools[i],
&swapchain->alloc);
}
}
static VkResult
radv_wsi_create_prime_command_buffers(struct radv_device *device,
const VkAllocationCallbacks *alloc,
struct wsi_swapchain *swapchain)
{
const int num_pools = NUM_PRIME_POOLS;
const int num_images = swapchain->image_count;
int num_cmd_buffers = num_images * num_pools; //TODO bump to MAX_QUEUE_FAMILIES
VkResult result;
int i, j;
swapchain->cmd_buffers = vk_alloc(alloc, (sizeof(VkCommandBuffer) * num_cmd_buffers), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!swapchain->cmd_buffers)
return VK_ERROR_OUT_OF_HOST_MEMORY;
memset(swapchain->cmd_buffers, 0, sizeof(VkCommandBuffer) * num_cmd_buffers);
memset(swapchain->cmd_pools, 0, sizeof(VkCommandPool) * num_pools);
for (i = 0; i < num_pools; i++) {
VkCommandPoolCreateInfo pool_create_info;
pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
pool_create_info.pNext = NULL;
pool_create_info.flags = 0;
pool_create_info.queueFamilyIndex = i;
result = radv_CreateCommandPool(radv_device_to_handle(device),
&pool_create_info, alloc,
&swapchain->cmd_pools[i]);
if (result != VK_SUCCESS)
goto fail;
VkCommandBufferAllocateInfo cmd_buffer_info;
cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cmd_buffer_info.pNext = NULL;
cmd_buffer_info.commandPool = swapchain->cmd_pools[i];
cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmd_buffer_info.commandBufferCount = num_images;
result = radv_AllocateCommandBuffers(radv_device_to_handle(device),
&cmd_buffer_info,
&swapchain->cmd_buffers[i * num_images]);
if (result != VK_SUCCESS)
goto fail;
for (j = 0; j < num_images; j++) {
VkImage image, linear_image;
int idx = (i * num_images) + j;
swapchain->get_image_and_linear(swapchain, j, &image, &linear_image);
VkCommandBufferBeginInfo begin_info = {0};
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
radv_BeginCommandBuffer(swapchain->cmd_buffers[idx], &begin_info);
radv_blit_to_prime_linear(radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx]),
radv_image_from_handle(image),
radv_image_from_handle(linear_image));
radv_EndCommandBuffer(swapchain->cmd_buffers[idx]);
}
}
return VK_SUCCESS;
fail:
radv_wsi_free_prime_command_buffers(device, swapchain);
return result;
}
VkResult radv_CreateSwapchainKHR(
VkDevice _device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
@@ -260,6 +358,7 @@ VkResult radv_CreateSwapchainKHR(
alloc = &device->alloc;
VkResult result = iface->create_swapchain(surface, _device,
&device->physical_device->wsi_device,
device->physical_device->local_fd,
pCreateInfo,
alloc, &radv_wsi_image_fns,
&swapchain);
@@ -274,6 +373,13 @@ VkResult radv_CreateSwapchainKHR(
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++)
swapchain->fences[i] = VK_NULL_HANDLE;
if (swapchain->needs_linear_copy) {
result = radv_wsi_create_prime_command_buffers(device, alloc,
swapchain);
if (result != VK_SUCCESS)
return result;
}
*pSwapchain = wsi_swapchain_to_handle(swapchain);
return VK_SUCCESS;
@@ -301,6 +407,9 @@ void radv_DestroySwapchainKHR(
radv_DestroyFence(_device, swapchain->fences[i], pAllocator);
}
if (swapchain->needs_linear_copy)
radv_wsi_free_prime_command_buffers(device, swapchain);
swapchain->destroy(swapchain, alloc);
}
@@ -347,7 +456,7 @@ VkResult radv_QueuePresentKHR(
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
struct radeon_winsys_cs *cs;
assert(radv_device_from_handle(swapchain->device) == queue->device);
if (swapchain->fences[0] == VK_NULL_HANDLE) {
result = radv_CreateFence(radv_device_to_handle(queue->device),
@@ -362,11 +471,16 @@ VkResult radv_QueuePresentKHR(
1, &swapchain->fences[0]);
}
if (swapchain->needs_linear_copy) {
int idx = (queue->queue_family_index * swapchain->image_count) + pPresentInfo->pImageIndices[i];
cs = radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx])->cs;
} else
cs = queue->device->empty_cs[queue->queue_family_index];
RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
struct radeon_winsys_fence *base_fence = fence->fence;
struct radeon_winsys_ctx *ctx = queue->hw_ctx;
queue->device->ws->cs_submit(ctx, queue->queue_idx,
&queue->device->empty_cs[queue->queue_family_index],
&cs,
1, NULL, NULL,
(struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);

View File

@@ -46,7 +46,7 @@ VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
device->local_fd,
device->local_fd, true,
connection, visual_id);
}
@@ -62,7 +62,7 @@ VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR(
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
device->local_fd,
device->local_fd, true,
XGetXCBConnection(dpy), visualID);
}

View File

@@ -94,7 +94,7 @@ VkResult anv_GetPhysicalDeviceSurfaceSupportKHR(
return iface->get_support(surface, &device->wsi_device,
&device->instance->alloc,
queueFamilyIndex, device->local_fd, pSupported);
queueFamilyIndex, device->local_fd, false, pSupported);
}
VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
@@ -142,6 +142,8 @@ static VkResult
x11_anv_wsi_image_create(VkDevice device_h,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
bool different_gpu,
bool linear,
VkImage *image_p,
VkDeviceMemory *memory_p,
uint32_t *size,
@@ -272,6 +274,7 @@ VkResult anv_CreateSwapchainKHR(
alloc = &device->alloc;
VkResult result = iface->create_swapchain(surface, _device,
&device->instance->physicalDevice.wsi_device,
device->instance->physicalDevice.local_fd,
pCreateInfo,
alloc, &anv_wsi_image_fns,
&swapchain);

View File

@@ -42,7 +42,7 @@ VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR(
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
device->local_fd,
device->local_fd, false,
connection, visual_id);
}
@@ -58,7 +58,7 @@ VkBool32 anv_GetPhysicalDeviceXlibPresentationSupportKHR(
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
device->local_fd,
device->local_fd, false,
XGetXCBConnection(dpy), visualID);
}

View File

@@ -35,6 +35,8 @@ struct wsi_image_fns {
VkResult (*create_wsi_image)(VkDevice device_h,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
bool needs_linear_copy,
bool linear,
VkImage *image_p,
VkDeviceMemory *memory_p,
uint32_t *size_p,
@@ -53,8 +55,11 @@ struct wsi_swapchain {
VkAllocationCallbacks alloc;
const struct wsi_image_fns *image_fns;
VkFence fences[3];
VkCommandBuffer *cmd_buffers;
VkCommandPool cmd_pools[3];
VkPresentModeKHR present_mode;
uint32_t image_count;
bool needs_linear_copy;
VkResult (*destroy)(struct wsi_swapchain *swapchain,
const VkAllocationCallbacks *pAllocator);
@@ -65,6 +70,7 @@ struct wsi_swapchain {
uint32_t *image_index);
VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
uint32_t image_index);
void (*get_image_and_linear)(struct wsi_swapchain *swapchain, int imageIndex, VkImage *image, VkImage *linear_image);
};
struct wsi_interface {
@@ -73,6 +79,7 @@ struct wsi_interface {
const VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
bool can_handle_different_gpu,
VkBool32* pSupported);
VkResult (*get_capabilities)(VkIcdSurfaceBase *surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities);
@@ -86,6 +93,7 @@ struct wsi_interface {
VkResult (*create_swapchain)(VkIcdSurfaceBase *surface,
VkDevice device,
struct wsi_device *wsi_device,
int local_fd,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
const struct wsi_image_fns *image_fns,

View File

@@ -352,6 +352,7 @@ wsi_wl_surface_get_support(VkIcdSurfaceBase *surface,
const VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
bool can_handle_different_gpu,
VkBool32* pSupported)
{
*pSupported = true;
@@ -637,6 +638,8 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain,
result = chain->base.image_fns->create_wsi_image(vk_device,
pCreateInfo,
pAllocator,
false,
false,
&image->image,
&image->memory,
&size,
@@ -694,6 +697,7 @@ static VkResult
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
VkDevice device,
struct wsi_device *wsi_device,
int local_fd,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
const struct wsi_image_fns *image_fns,
@@ -724,6 +728,7 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->base.image_fns = image_fns;
chain->base.present_mode = pCreateInfo->presentMode;
chain->base.image_count = num_images;
chain->base.needs_linear_copy = false;
chain->surface = surface->surface;
chain->extent = pCreateInfo->imageExtent;
chain->vk_format = pCreateInfo->imageFormat;

View File

@@ -317,6 +317,7 @@ VkBool32 wsi_get_physical_device_xcb_presentation_support(
VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int fd,
bool can_handle_different_gpu,
xcb_connection_t* connection,
xcb_visualid_t visual_id)
{
@@ -332,8 +333,9 @@ VkBool32 wsi_get_physical_device_xcb_presentation_support(
return false;
}
if (!wsi_x11_check_dri3_compatible(connection, fd))
return false;
if (!can_handle_different_gpu)
if (!wsi_x11_check_dri3_compatible(connection, fd))
return false;
unsigned visual_depth;
if (!connection_get_visualtype(connection, visual_id, &visual_depth))
@@ -369,6 +371,7 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
const VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
bool can_handle_different_gpu,
VkBool32* pSupported)
{
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
@@ -386,8 +389,9 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
return VK_SUCCESS;
}
if (!wsi_x11_check_dri3_compatible(conn, local_fd))
return false;
if (!can_handle_different_gpu)
if (!wsi_x11_check_dri3_compatible(conn, local_fd))
return false;
unsigned visual_depth;
if (!get_visualtype_for_window(conn, window, &visual_depth)) {
@@ -550,7 +554,9 @@ VkResult wsi_create_xlib_surface(const VkAllocationCallbacks *pAllocator,
struct x11_image {
VkImage image;
VkImage linear_image; // for prime
VkDeviceMemory memory;
VkDeviceMemory linear_memory; // for prime
xcb_pixmap_t pixmap;
bool busy;
struct xshmfence * shm_fence;
@@ -607,6 +613,15 @@ x11_get_images(struct wsi_swapchain *anv_chain,
return result;
}
static void
x11_get_image_and_linear(struct wsi_swapchain *drv_chain,
int imageIndex, VkImage *image, VkImage *linear_image)
{
struct x11_swapchain *chain = (struct x11_swapchain *)drv_chain;
*image = chain->images[imageIndex].image;
*linear_image = chain->images[imageIndex].linear_image;
}
static VkResult
x11_handle_dri3_present_event(struct x11_swapchain *chain,
xcb_present_generic_event_t *event)
@@ -890,6 +905,8 @@ x11_image_init(VkDevice device_h, struct x11_swapchain *chain,
result = chain->base.image_fns->create_wsi_image(device_h,
pCreateInfo,
pAllocator,
chain->base.needs_linear_copy,
false,
&image->image,
&image->memory,
&size,
@@ -899,6 +916,25 @@ x11_image_init(VkDevice device_h, struct x11_swapchain *chain,
if (result != VK_SUCCESS)
return result;
if (chain->base.needs_linear_copy) {
result = chain->base.image_fns->create_wsi_image(device_h,
pCreateInfo,
pAllocator,
chain->base.needs_linear_copy,
true,
&image->linear_image,
&image->linear_memory,
&size,
&offset,
&row_pitch,
&fd);
if (result != VK_SUCCESS) {
chain->base.image_fns->free_wsi_image(device_h, pAllocator,
image->image, image->memory);
return result;
}
}
image->pixmap = xcb_generate_id(chain->conn);
cookie =
@@ -939,8 +975,12 @@ fail_pixmap:
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
xcb_discard_reply(chain->conn, cookie.sequence);
if (chain->base.needs_linear_copy) {
chain->base.image_fns->free_wsi_image(device_h, pAllocator,
image->linear_image, image->linear_memory);
}
chain->base.image_fns->free_wsi_image(device_h, pAllocator,
image->image, image->memory);
image->image, image->memory);
return result;
}
@@ -959,6 +999,10 @@ x11_image_finish(struct x11_swapchain *chain,
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
xcb_discard_reply(chain->conn, cookie.sequence);
if (chain->base.needs_linear_copy) {
chain->base.image_fns->free_wsi_image(chain->base.device, pAllocator,
image->linear_image, image->linear_memory);
}
chain->base.image_fns->free_wsi_image(chain->base.device, pAllocator,
image->image, image->memory);
}
@@ -997,6 +1041,7 @@ static VkResult
x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
VkDevice device,
struct wsi_device *wsi_device,
int local_fd,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
const struct wsi_image_fns *image_fns,
@@ -1027,6 +1072,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->base.device = device;
chain->base.destroy = x11_swapchain_destroy;
chain->base.get_images = x11_get_images;
chain->base.get_image_and_linear = x11_get_image_and_linear;
chain->base.acquire_next_image = x11_acquire_next_image;
chain->base.queue_present = x11_queue_present;
chain->base.image_fns = image_fns;
@@ -1043,6 +1089,10 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
free(geometry);
chain->base.needs_linear_copy = false;
if (!wsi_x11_check_dri3_compatible(conn, local_fd))
chain->base.needs_linear_copy = true;
chain->event_id = xcb_generate_id(chain->conn);
xcb_present_select_input(chain->conn, chain->event_id, chain->window,
XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |

View File

@@ -30,6 +30,7 @@ VkBool32 wsi_get_physical_device_xcb_presentation_support(
VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
bool can_handle_different_gpu,
xcb_connection_t* connection,
xcb_visualid_t visual_id);