radv/winsys: Set winsys bo priority on creation.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Bas Nieuwenhuizen
2019-01-28 00:28:05 +01:00
parent 3a8d6c0880
commit ead54d4a42
12 changed files with 82 additions and 29 deletions

View File

@@ -374,7 +374,8 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer,
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_32BIT);
RADEON_FLAG_32BIT,
RADV_BO_PRIORITY_UPLOAD_BUFFER);
if (!bo) {
cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;

View File

@@ -63,7 +63,8 @@ radv_init_trace(struct radv_device *device)
device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS|
RADEON_FLAG_NO_INTERPROCESS_SHARING);
RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_UPLOAD_BUFFER);
if (!device->trace_bo)
return false;

View File

@@ -598,7 +598,8 @@ VkResult radv_CreateDescriptorPool(
RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY |
RADEON_FLAG_32BIT);
RADEON_FLAG_32BIT,
RADV_BO_PRIORITY_DESCRIPTOR);
pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
}
pool->size = bo_size;

View File

@@ -2373,7 +2373,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
scratch_size,
4096,
RADEON_DOMAIN_VRAM,
ring_bo_flags);
ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH);
if (!scratch_bo)
goto fail;
} else
@@ -2384,7 +2385,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
compute_scratch_size,
4096,
RADEON_DOMAIN_VRAM,
ring_bo_flags);
ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH);
if (!compute_scratch_bo)
goto fail;
@@ -2396,7 +2398,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
esgs_ring_size,
4096,
RADEON_DOMAIN_VRAM,
ring_bo_flags);
ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH);
if (!esgs_ring_bo)
goto fail;
} else {
@@ -2409,7 +2412,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
gsvs_ring_size,
4096,
RADEON_DOMAIN_VRAM,
ring_bo_flags);
ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH);
if (!gsvs_ring_bo)
goto fail;
} else {
@@ -2422,7 +2426,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
tess_offchip_ring_offset + tess_offchip_ring_size,
256,
RADEON_DOMAIN_VRAM,
ring_bo_flags);
ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH);
if (!tess_rings_bo)
goto fail;
} else {
@@ -2450,7 +2455,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY);
RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_DESCRIPTOR);
if (!descriptor_bo)
goto fail;
} else
@@ -3088,7 +3094,7 @@ static VkResult radv_alloc_memory(struct radv_device *device,
import_info->handleType ==
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
NULL, NULL);
RADV_BO_PRIORITY_DEFAULT, NULL, NULL);
if (!mem->bo) {
result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
goto fail;
@@ -3099,7 +3105,8 @@ static VkResult radv_alloc_memory(struct radv_device *device,
assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
pAllocateInfo->allocationSize);
pAllocateInfo->allocationSize,
RADV_BO_PRIORITY_DEFAULT);
if (!mem->bo) {
result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
goto fail;
@@ -3126,7 +3133,7 @@ static VkResult radv_alloc_memory(struct radv_device *device,
flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
domain, flags);
domain, flags, RADV_BO_PRIORITY_DEFAULT);
if (!mem->bo) {
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -3886,7 +3893,8 @@ VkResult radv_CreateEvent(
event->bo = device->ws->buffer_create(device->ws, 8, 8,
RADEON_DOMAIN_GTT,
RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_FENCE);
if (!event->bo) {
vk_free2(&device->alloc, pAllocator, event);
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
@@ -3972,7 +3980,8 @@ VkResult radv_CreateBuffer(
if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
buffer->bo = device->ws->buffer_create(device->ws,
align64(buffer->size, 4096),
4096, 0, RADEON_FLAG_VIRTUAL);
4096, 0, RADEON_FLAG_VIRTUAL,
RADV_BO_PRIORITY_VIRTUAL);
if (!buffer->bo) {
vk_free2(&device->alloc, pAllocator, buffer);
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);

View File

@@ -1046,7 +1046,7 @@ radv_image_create(VkDevice _device,
image->offset = 0;
image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
0, RADEON_FLAG_VIRTUAL);
0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
if (!image->bo) {
vk_free2(&device->alloc, alloc, image);
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);

View File

@@ -1061,7 +1061,8 @@ VkResult radv_CreateQueryPool(
pool->size += 4 * pCreateInfo->queryCount;
pool->bo = device->ws->buffer_create(device->ws, pool->size,
64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING);
64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_QUERY_POOL);
if (!pool->bo) {
vk_free2(&device->alloc, pAllocator, pool);

View File

@@ -188,6 +188,29 @@ struct radv_winsys_bo_list {
unsigned count;
};
/* Kernel effectively allows 0-31. This sets some priorities for fixed
* functionality buffers */
enum {
RADV_BO_PRIORITY_DEFAULT = 14,
RADV_BO_PRIORITY_APPLICATION_MAX = 28,
/* virtual buffers have 0 priority since the priority is not used. */
RADV_BO_PRIORITY_VIRTUAL = 0,
/* This should be considerably lower than most of the stuff below,
* but how much lower is hard to say since we don't know application
* assignments. Put it pretty high since it is GTT anyway. */
RADV_BO_PRIORITY_QUERY_POOL = 29,
RADV_BO_PRIORITY_DESCRIPTOR = 30,
RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
RADV_BO_PRIORITY_FENCE = 30,
RADV_BO_PRIORITY_SHADER = 31,
RADV_BO_PRIORITY_SCRATCH = 31,
RADV_BO_PRIORITY_CS = 31,
};
struct radeon_winsys {
void (*destroy)(struct radeon_winsys *ws);
@@ -206,17 +229,20 @@ struct radeon_winsys {
uint64_t size,
unsigned alignment,
enum radeon_bo_domain domain,
enum radeon_bo_flag flags);
enum radeon_bo_flag flags,
unsigned priority);
void (*buffer_destroy)(struct radeon_winsys_bo *bo);
void *(*buffer_map)(struct radeon_winsys_bo *bo);
struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws,
void *pointer,
uint64_t size);
uint64_t size,
unsigned priority);
struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws,
int fd,
unsigned priority,
unsigned *stride, unsigned *offset);
bool (*buffer_get_fd)(struct radeon_winsys *ws,

View File

@@ -395,7 +395,8 @@ radv_alloc_shader_memory(struct radv_device *device,
RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING |
(device->physical_device->cpdma_prefetch_writes_memory ?
0 : RADEON_FLAG_READ_ONLY));
0 : RADEON_FLAG_READ_ONLY),
RADV_BO_PRIORITY_SHADER);
slab->ptr = (char*)device->ws->buffer_map(slab->bo);
list_inithead(&slab->shaders);

View File

@@ -399,7 +399,8 @@ cik_create_gfx_config(struct radv_device *device)
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY);
RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_CS);
if (!device->gfx_init)
goto fail;

View File

@@ -302,7 +302,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
uint64_t size,
unsigned alignment,
enum radeon_bo_domain initial_domain,
unsigned flags)
unsigned flags,
unsigned priority)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *bo;
@@ -392,6 +393,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
bo->bo = buf_handle;
bo->initial_domain = initial_domain;
bo->is_shared = false;
bo->priority = priority;
if (initial_domain & RADEON_DOMAIN_VRAM)
p_atomic_add(&ws->allocated_vram,
@@ -460,7 +462,8 @@ radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
void *pointer,
uint64_t size)
uint64_t size,
unsigned priority)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
amdgpu_bo_handle buf_handle;
@@ -498,6 +501,7 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
bo->ws = ws;
bo->bo = buf_handle;
bo->initial_domain = RADEON_DOMAIN_GTT;
bo->priority = priority;
p_atomic_add(&ws->allocated_gtt,
align64(bo->size, ws->info.gart_page_size));
@@ -518,7 +522,8 @@ error:
static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
int fd, unsigned *stride,
int fd, unsigned priority,
unsigned *stride,
unsigned *offset)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@@ -565,6 +570,7 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
bo->size = result.alloc_size;
bo->is_shared = true;
bo->ws = ws;
bo->priority = priority;
bo->ref_count = 1;
if (bo->initial_domain & RADEON_DOMAIN_VRAM)

View File

@@ -45,6 +45,7 @@ struct radv_amdgpu_winsys_bo {
uint64_t size;
struct radv_amdgpu_winsys *ws;
bool is_virtual;
uint8_t priority;
int ref_count;
union {

View File

@@ -243,7 +243,8 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws,
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY);
RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_CS);
if (!cs->ib_buffer) {
free(cs);
return NULL;
@@ -358,7 +359,8 @@ static void radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY);
RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_CS);
if (!cs->ib_buffer) {
cs->base.cdw = 0;
@@ -1016,7 +1018,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY);
RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_CS);
ptr = ws->buffer_map(bos[j]);
if (needs_preamble) {
@@ -1055,7 +1058,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY);
RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_CS);
ptr = ws->buffer_map(bos[0]);
if (preamble_cs) {
@@ -1249,8 +1253,9 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w
assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS|
RADEON_FLAG_NO_INTERPROCESS_SHARING);
RADEON_FLAG_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_CS);
if (ctx->fence_bo)
ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo);
if (ctx->fence_map)