anv: Split i915 code from anv_batch_chain.c
There is no change in behavior here. Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Rohan Garg <rohan.garg@intel.com> Acked-by: Marcin Ślusarz <marcin.slusarz@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20428>
This commit is contained in:

committed by
Marge Bot

parent
94ca73b356
commit
94af444490
@@ -34,11 +34,11 @@
|
|||||||
|
|
||||||
#include "genxml/gen8_pack.h"
|
#include "genxml/gen8_pack.h"
|
||||||
#include "genxml/genX_bits.h"
|
#include "genxml/genX_bits.h"
|
||||||
#include "perf/intel_perf.h"
|
|
||||||
|
|
||||||
#include "util/u_debug.h"
|
|
||||||
#include "util/perf/u_trace.h"
|
#include "util/perf/u_trace.h"
|
||||||
|
|
||||||
|
#include "i915/anv_batch_chain.h"
|
||||||
|
|
||||||
/** \file anv_batch_chain.c
|
/** \file anv_batch_chain.c
|
||||||
*
|
*
|
||||||
* This file contains functions related to anv_cmd_buffer as a data
|
* This file contains functions related to anv_cmd_buffer as a data
|
||||||
@@ -1139,294 +1139,6 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
|
|||||||
&secondary->surface_relocs);
|
&secondary->surface_relocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct anv_execbuf {
|
|
||||||
struct drm_i915_gem_execbuffer2 execbuf;
|
|
||||||
|
|
||||||
struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
|
|
||||||
|
|
||||||
struct drm_i915_gem_exec_object2 * objects;
|
|
||||||
uint32_t bo_count;
|
|
||||||
struct anv_bo ** bos;
|
|
||||||
|
|
||||||
/* Allocated length of the 'objects' and 'bos' arrays */
|
|
||||||
uint32_t array_length;
|
|
||||||
|
|
||||||
uint32_t syncobj_count;
|
|
||||||
uint32_t syncobj_array_length;
|
|
||||||
struct drm_i915_gem_exec_fence * syncobjs;
|
|
||||||
uint64_t * syncobj_values;
|
|
||||||
|
|
||||||
uint32_t cmd_buffer_count;
|
|
||||||
struct anv_query_pool *perf_query_pool;
|
|
||||||
|
|
||||||
const VkAllocationCallbacks * alloc;
|
|
||||||
VkSystemAllocationScope alloc_scope;
|
|
||||||
|
|
||||||
int perf_query_pass;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void
|
|
||||||
anv_execbuf_finish(struct anv_execbuf *exec)
|
|
||||||
{
|
|
||||||
vk_free(exec->alloc, exec->syncobjs);
|
|
||||||
vk_free(exec->alloc, exec->syncobj_values);
|
|
||||||
vk_free(exec->alloc, exec->objects);
|
|
||||||
vk_free(exec->alloc, exec->bos);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
anv_execbuf_add_ext(struct anv_execbuf *exec,
|
|
||||||
uint32_t ext_name,
|
|
||||||
struct i915_user_extension *ext)
|
|
||||||
{
|
|
||||||
__u64 *iter = &exec->execbuf.cliprects_ptr;
|
|
||||||
|
|
||||||
exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
|
|
||||||
|
|
||||||
while (*iter != 0) {
|
|
||||||
iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
|
|
||||||
}
|
|
||||||
|
|
||||||
ext->name = ext_name;
|
|
||||||
|
|
||||||
*iter = (uintptr_t) ext;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
anv_execbuf_add_bo_bitset(struct anv_device *device,
|
|
||||||
struct anv_execbuf *exec,
|
|
||||||
uint32_t dep_words,
|
|
||||||
BITSET_WORD *deps,
|
|
||||||
uint32_t extra_flags);
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
anv_execbuf_add_bo(struct anv_device *device,
|
|
||||||
struct anv_execbuf *exec,
|
|
||||||
struct anv_bo *bo,
|
|
||||||
struct anv_reloc_list *relocs,
|
|
||||||
uint32_t extra_flags)
|
|
||||||
{
|
|
||||||
struct drm_i915_gem_exec_object2 *obj = NULL;
|
|
||||||
|
|
||||||
if (bo->exec_obj_index < exec->bo_count &&
|
|
||||||
exec->bos[bo->exec_obj_index] == bo)
|
|
||||||
obj = &exec->objects[bo->exec_obj_index];
|
|
||||||
|
|
||||||
if (obj == NULL) {
|
|
||||||
/* We've never seen this one before. Add it to the list and assign
|
|
||||||
* an id that we can use later.
|
|
||||||
*/
|
|
||||||
if (exec->bo_count >= exec->array_length) {
|
|
||||||
uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
|
|
||||||
|
|
||||||
struct drm_i915_gem_exec_object2 *new_objects =
|
|
||||||
vk_alloc(exec->alloc, new_len * sizeof(*new_objects), 8, exec->alloc_scope);
|
|
||||||
if (new_objects == NULL)
|
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
|
|
||||||
struct anv_bo **new_bos =
|
|
||||||
vk_alloc(exec->alloc, new_len * sizeof(*new_bos), 8, exec->alloc_scope);
|
|
||||||
if (new_bos == NULL) {
|
|
||||||
vk_free(exec->alloc, new_objects);
|
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (exec->objects) {
|
|
||||||
memcpy(new_objects, exec->objects,
|
|
||||||
exec->bo_count * sizeof(*new_objects));
|
|
||||||
memcpy(new_bos, exec->bos,
|
|
||||||
exec->bo_count * sizeof(*new_bos));
|
|
||||||
}
|
|
||||||
|
|
||||||
vk_free(exec->alloc, exec->objects);
|
|
||||||
vk_free(exec->alloc, exec->bos);
|
|
||||||
|
|
||||||
exec->objects = new_objects;
|
|
||||||
exec->bos = new_bos;
|
|
||||||
exec->array_length = new_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(exec->bo_count < exec->array_length);
|
|
||||||
|
|
||||||
bo->exec_obj_index = exec->bo_count++;
|
|
||||||
obj = &exec->objects[bo->exec_obj_index];
|
|
||||||
exec->bos[bo->exec_obj_index] = bo;
|
|
||||||
|
|
||||||
obj->handle = bo->gem_handle;
|
|
||||||
obj->relocation_count = 0;
|
|
||||||
obj->relocs_ptr = 0;
|
|
||||||
obj->alignment = 0;
|
|
||||||
obj->offset = bo->offset;
|
|
||||||
obj->flags = bo->flags | extra_flags;
|
|
||||||
obj->rsvd1 = 0;
|
|
||||||
obj->rsvd2 = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (extra_flags & EXEC_OBJECT_WRITE) {
|
|
||||||
obj->flags |= EXEC_OBJECT_WRITE;
|
|
||||||
obj->flags &= ~EXEC_OBJECT_ASYNC;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (relocs != NULL) {
|
|
||||||
return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
|
|
||||||
relocs->deps, extra_flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add BO dependencies to execbuf */
|
|
||||||
static VkResult
|
|
||||||
anv_execbuf_add_bo_bitset(struct anv_device *device,
|
|
||||||
struct anv_execbuf *exec,
|
|
||||||
uint32_t dep_words,
|
|
||||||
BITSET_WORD *deps,
|
|
||||||
uint32_t extra_flags)
|
|
||||||
{
|
|
||||||
for (uint32_t w = 0; w < dep_words; w++) {
|
|
||||||
BITSET_WORD mask = deps[w];
|
|
||||||
while (mask) {
|
|
||||||
int i = u_bit_scan(&mask);
|
|
||||||
uint32_t gem_handle = w * BITSET_WORDBITS + i;
|
|
||||||
struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
|
|
||||||
assert(bo->refcount > 0);
|
|
||||||
VkResult result =
|
|
||||||
anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
anv_execbuf_add_syncobj(struct anv_device *device,
|
|
||||||
struct anv_execbuf *exec,
|
|
||||||
uint32_t syncobj,
|
|
||||||
uint32_t flags,
|
|
||||||
uint64_t timeline_value)
|
|
||||||
{
|
|
||||||
if (exec->syncobj_count >= exec->syncobj_array_length) {
|
|
||||||
uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
|
|
||||||
|
|
||||||
struct drm_i915_gem_exec_fence *new_syncobjs =
|
|
||||||
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobjs),
|
|
||||||
8, exec->alloc_scope);
|
|
||||||
if (!new_syncobjs)
|
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
|
|
||||||
if (exec->syncobjs)
|
|
||||||
typed_memcpy(new_syncobjs, exec->syncobjs, exec->syncobj_count);
|
|
||||||
|
|
||||||
exec->syncobjs = new_syncobjs;
|
|
||||||
|
|
||||||
if (exec->syncobj_values) {
|
|
||||||
uint64_t *new_syncobj_values =
|
|
||||||
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobj_values),
|
|
||||||
8, exec->alloc_scope);
|
|
||||||
if (!new_syncobj_values)
|
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
|
|
||||||
typed_memcpy(new_syncobj_values, exec->syncobj_values,
|
|
||||||
exec->syncobj_count);
|
|
||||||
|
|
||||||
exec->syncobj_values = new_syncobj_values;
|
|
||||||
}
|
|
||||||
|
|
||||||
exec->syncobj_array_length = new_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (timeline_value && !exec->syncobj_values) {
|
|
||||||
exec->syncobj_values =
|
|
||||||
vk_zalloc(exec->alloc, exec->syncobj_array_length *
|
|
||||||
sizeof(*exec->syncobj_values),
|
|
||||||
8, exec->alloc_scope);
|
|
||||||
if (!exec->syncobj_values)
|
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
}
|
|
||||||
|
|
||||||
exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
|
|
||||||
.handle = syncobj,
|
|
||||||
.flags = flags,
|
|
||||||
};
|
|
||||||
if (timeline_value)
|
|
||||||
exec->syncobj_values[exec->syncobj_count] = timeline_value;
|
|
||||||
|
|
||||||
exec->syncobj_count++;
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
anv_execbuf_add_sync(struct anv_device *device,
|
|
||||||
struct anv_execbuf *execbuf,
|
|
||||||
struct vk_sync *sync,
|
|
||||||
bool is_signal,
|
|
||||||
uint64_t value)
|
|
||||||
{
|
|
||||||
/* It's illegal to signal a timeline with value 0 because that's never
|
|
||||||
* higher than the current value. A timeline wait on value 0 is always
|
|
||||||
* trivial because 0 <= uint64_t always.
|
|
||||||
*/
|
|
||||||
if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
|
|
||||||
return VK_SUCCESS;
|
|
||||||
|
|
||||||
if (vk_sync_is_anv_bo_sync(sync)) {
|
|
||||||
struct anv_bo_sync *bo_sync =
|
|
||||||
container_of(sync, struct anv_bo_sync, sync);
|
|
||||||
|
|
||||||
assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
|
|
||||||
|
|
||||||
return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
|
|
||||||
is_signal ? EXEC_OBJECT_WRITE : 0);
|
|
||||||
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
|
||||||
struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
|
|
||||||
|
|
||||||
if (!(sync->flags & VK_SYNC_IS_TIMELINE))
|
|
||||||
value = 0;
|
|
||||||
|
|
||||||
return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
|
|
||||||
is_signal ? I915_EXEC_FENCE_SIGNAL :
|
|
||||||
I915_EXEC_FENCE_WAIT,
|
|
||||||
value);
|
|
||||||
}
|
|
||||||
|
|
||||||
unreachable("Invalid sync type");
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
|
|
||||||
struct anv_cmd_buffer *cmd_buffer)
|
|
||||||
{
|
|
||||||
VkResult result;
|
|
||||||
/* Add surface dependencies (BOs) to the execbuf */
|
|
||||||
anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
|
|
||||||
cmd_buffer->surface_relocs.dep_words,
|
|
||||||
cmd_buffer->surface_relocs.deps, 0);
|
|
||||||
|
|
||||||
/* First, we walk over all of the bos we've seen and add them and their
|
|
||||||
* relocations to the validate list.
|
|
||||||
*/
|
|
||||||
struct anv_batch_bo **bbo;
|
|
||||||
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
|
|
||||||
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
|
|
||||||
(*bbo)->bo, &(*bbo)->relocs, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct anv_bo **bo_entry;
|
|
||||||
u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
|
|
||||||
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
|
|
||||||
*bo_entry, NULL, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
|
anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
|
||||||
uint32_t num_cmd_buffers)
|
uint32_t num_cmd_buffers)
|
||||||
@@ -1444,245 +1156,6 @@ anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
|
|||||||
anv_cmd_buffer_record_end_submit(cmd_buffers[num_cmd_buffers - 1]);
|
anv_cmd_buffer_record_end_submit(cmd_buffers[num_cmd_buffers - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
|
||||||
pin_state_pool(struct anv_device *device,
|
|
||||||
struct anv_execbuf *execbuf,
|
|
||||||
struct anv_state_pool *pool)
|
|
||||||
{
|
|
||||||
anv_block_pool_foreach_bo(bo, &pool->block_pool) {
|
|
||||||
VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
|
|
||||||
struct anv_queue *queue,
|
|
||||||
struct anv_cmd_buffer **cmd_buffers,
|
|
||||||
uint32_t num_cmd_buffers)
|
|
||||||
{
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
VkResult result;
|
|
||||||
|
|
||||||
/* Edit the tail of the command buffers to chain them all together if they
|
|
||||||
* can be.
|
|
||||||
*/
|
|
||||||
anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
|
|
||||||
anv_measure_submit(cmd_buffers[i]);
|
|
||||||
result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add all the global BOs to the object list for softpin case. */
|
|
||||||
result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = pin_state_pool(device, execbuf, &device->general_state_pool);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = pin_state_pool(device, execbuf, &device->binding_table_pool);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
/* Add the BOs for all user allocated memory objects because we can't
|
|
||||||
* track after binding updates of VK_EXT_descriptor_indexing.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(struct anv_device_memory, mem,
|
|
||||||
&device->memory_objects, link) {
|
|
||||||
result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < execbuf->bo_count; i++)
|
|
||||||
execbuf->objects[i].offset = execbuf->bos[i]->offset;
|
|
||||||
|
|
||||||
struct anv_batch_bo *first_batch_bo =
|
|
||||||
list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link);
|
|
||||||
|
|
||||||
/* The kernel requires that the last entry in the validation list be the
|
|
||||||
* batch buffer to execute. We can simply swap the element
|
|
||||||
* corresponding to the first batch_bo in the chain with the last
|
|
||||||
* element in the list.
|
|
||||||
*/
|
|
||||||
if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
|
|
||||||
uint32_t idx = first_batch_bo->bo->exec_obj_index;
|
|
||||||
uint32_t last_idx = execbuf->bo_count - 1;
|
|
||||||
|
|
||||||
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
|
|
||||||
assert(execbuf->bos[idx] == first_batch_bo->bo);
|
|
||||||
|
|
||||||
execbuf->objects[idx] = execbuf->objects[last_idx];
|
|
||||||
execbuf->bos[idx] = execbuf->bos[last_idx];
|
|
||||||
execbuf->bos[idx]->exec_obj_index = idx;
|
|
||||||
|
|
||||||
execbuf->objects[last_idx] = tmp_obj;
|
|
||||||
execbuf->bos[last_idx] = first_batch_bo->bo;
|
|
||||||
first_batch_bo->bo->exec_obj_index = last_idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
|
||||||
if (device->physical->memory.need_clflush) {
|
|
||||||
__builtin_ia32_mfence();
|
|
||||||
struct anv_batch_bo **bbo;
|
|
||||||
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
|
|
||||||
u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) {
|
|
||||||
for (uint32_t l = 0; l < (*bbo)->length; l += CACHELINE_SIZE)
|
|
||||||
__builtin_ia32_clflush((*bbo)->bo->map + l);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
|
|
||||||
.buffers_ptr = (uintptr_t) execbuf->objects,
|
|
||||||
.buffer_count = execbuf->bo_count,
|
|
||||||
.batch_start_offset = 0,
|
|
||||||
/* We'll fill in batch length later when chaining batches. */
|
|
||||||
.batch_len = 0,
|
|
||||||
.cliprects_ptr = 0,
|
|
||||||
.num_cliprects = 0,
|
|
||||||
.DR1 = 0,
|
|
||||||
.DR4 = 0,
|
|
||||||
.flags = I915_EXEC_NO_RELOC |
|
|
||||||
I915_EXEC_HANDLE_LUT |
|
|
||||||
queue->exec_flags,
|
|
||||||
.rsvd1 = device->context_id,
|
|
||||||
.rsvd2 = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
|
|
||||||
{
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
VkResult result = anv_execbuf_add_bo(device, execbuf,
|
|
||||||
device->trivial_batch_bo,
|
|
||||||
NULL, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
|
|
||||||
.buffers_ptr = (uintptr_t) execbuf->objects,
|
|
||||||
.buffer_count = execbuf->bo_count,
|
|
||||||
.batch_start_offset = 0,
|
|
||||||
.batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
|
|
||||||
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
|
|
||||||
.rsvd1 = device->context_id,
|
|
||||||
.rsvd2 = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
|
|
||||||
struct anv_utrace_flush_copy *flush)
|
|
||||||
{
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
VkResult result = anv_execbuf_add_bo(device, execbuf,
|
|
||||||
flush->batch_bo,
|
|
||||||
&flush->relocs, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = anv_execbuf_add_sync(device, execbuf, flush->sync,
|
|
||||||
true /* is_signal */, 0 /* value */);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) {
|
|
||||||
uint32_t idx = flush->batch_bo->exec_obj_index;
|
|
||||||
uint32_t last_idx = execbuf->bo_count - 1;
|
|
||||||
|
|
||||||
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
|
|
||||||
assert(execbuf->bos[idx] == flush->batch_bo);
|
|
||||||
|
|
||||||
execbuf->objects[idx] = execbuf->objects[last_idx];
|
|
||||||
execbuf->bos[idx] = execbuf->bos[last_idx];
|
|
||||||
execbuf->bos[idx]->exec_obj_index = idx;
|
|
||||||
|
|
||||||
execbuf->objects[last_idx] = tmp_obj;
|
|
||||||
execbuf->bos[last_idx] = flush->batch_bo;
|
|
||||||
flush->batch_bo->exec_obj_index = last_idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
|
||||||
if (device->physical->memory.need_clflush)
|
|
||||||
intel_flush_range(flush->batch_bo->map, flush->batch_bo->size);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
|
|
||||||
.buffers_ptr = (uintptr_t) execbuf->objects,
|
|
||||||
.buffer_count = execbuf->bo_count,
|
|
||||||
.batch_start_offset = 0,
|
|
||||||
.batch_len = flush->batch.next - flush->batch.start,
|
|
||||||
.flags = I915_EXEC_NO_RELOC |
|
|
||||||
I915_EXEC_HANDLE_LUT |
|
|
||||||
I915_EXEC_FENCE_ARRAY |
|
|
||||||
queue->exec_flags,
|
|
||||||
.rsvd1 = device->context_id,
|
|
||||||
.rsvd2 = 0,
|
|
||||||
.num_cliprects = execbuf->syncobj_count,
|
|
||||||
.cliprects_ptr = (uintptr_t)execbuf->syncobjs,
|
|
||||||
};
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkResult
|
|
||||||
anv_queue_exec_utrace_locked(struct anv_queue *queue,
|
|
||||||
struct anv_utrace_flush_copy *flush)
|
|
||||||
{
|
|
||||||
assert(flush->batch_bo);
|
|
||||||
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
struct anv_execbuf execbuf = {
|
|
||||||
.alloc = &device->vk.alloc,
|
|
||||||
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
|
||||||
};
|
|
||||||
|
|
||||||
VkResult result = setup_utrace_execbuf(&execbuf, queue, flush);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
int ret = queue->device->info->no_hw ? 0 :
|
|
||||||
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
|
|
||||||
if (ret)
|
|
||||||
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
|
|
||||||
|
|
||||||
error:
|
|
||||||
anv_execbuf_finish(&execbuf);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
|
anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
|
||||||
uint32_t cmd_buffer_count,
|
uint32_t cmd_buffer_count,
|
||||||
@@ -1752,181 +1225,10 @@ anv_queue_exec_locked(struct anv_queue *queue,
|
|||||||
struct anv_query_pool *perf_query_pool,
|
struct anv_query_pool *perf_query_pool,
|
||||||
uint32_t perf_query_pass)
|
uint32_t perf_query_pass)
|
||||||
{
|
{
|
||||||
struct anv_device *device = queue->device;
|
return anv_i915_queue_exec_locked(queue, wait_count, waits,
|
||||||
struct anv_utrace_flush_copy *utrace_flush_data = NULL;
|
cmd_buffer_count, cmd_buffers,
|
||||||
struct anv_execbuf execbuf = {
|
signal_count, signals,
|
||||||
.alloc = &queue->device->vk.alloc,
|
perf_query_pool, perf_query_pass);
|
||||||
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
|
||||||
.perf_query_pass = perf_query_pass,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Flush the trace points first, they need to be moved */
|
|
||||||
VkResult result =
|
|
||||||
anv_device_utrace_flush_cmd_buffers(queue,
|
|
||||||
cmd_buffer_count,
|
|
||||||
cmd_buffers,
|
|
||||||
&utrace_flush_data);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
if (utrace_flush_data && !utrace_flush_data->batch_bo) {
|
|
||||||
result = anv_execbuf_add_sync(device, &execbuf,
|
|
||||||
utrace_flush_data->sync,
|
|
||||||
true /* is_signal */,
|
|
||||||
0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
utrace_flush_data = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Always add the workaround BO as it includes a driver identifier for the
|
|
||||||
* error_state.
|
|
||||||
*/
|
|
||||||
result =
|
|
||||||
anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < wait_count; i++) {
|
|
||||||
result = anv_execbuf_add_sync(device, &execbuf,
|
|
||||||
waits[i].sync,
|
|
||||||
false /* is_signal */,
|
|
||||||
waits[i].wait_value);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < signal_count; i++) {
|
|
||||||
result = anv_execbuf_add_sync(device, &execbuf,
|
|
||||||
signals[i].sync,
|
|
||||||
true /* is_signal */,
|
|
||||||
signals[i].signal_value);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (queue->sync) {
|
|
||||||
result = anv_execbuf_add_sync(device, &execbuf,
|
|
||||||
queue->sync,
|
|
||||||
true /* is_signal */,
|
|
||||||
0 /* signal_value */);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cmd_buffer_count) {
|
|
||||||
result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
|
|
||||||
cmd_buffers,
|
|
||||||
cmd_buffer_count);
|
|
||||||
} else {
|
|
||||||
result = setup_empty_execbuf(&execbuf, queue);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
const bool has_perf_query =
|
|
||||||
perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
|
|
||||||
|
|
||||||
if (INTEL_DEBUG(DEBUG_SUBMIT)) {
|
|
||||||
uint32_t total_size_kb = 0;
|
|
||||||
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
|
|
||||||
const struct anv_bo *bo = execbuf.bos[i];
|
|
||||||
total_size_kb += bo->size / 1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (%.1fMb aperture)\n",
|
|
||||||
execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len,
|
|
||||||
(float)total_size_kb / 1024.0f);
|
|
||||||
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
|
|
||||||
const struct anv_bo *bo = execbuf.bos[i];
|
|
||||||
uint64_t size = bo->size + bo->_ccs_size;
|
|
||||||
|
|
||||||
fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
|
|
||||||
"KB handle=%05u capture=%u name=%s\n",
|
|
||||||
bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle,
|
|
||||||
(bo->flags & EXEC_OBJECT_CAPTURE) != 0, bo->name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
|
|
||||||
perf_query_pool, perf_query_pass);
|
|
||||||
|
|
||||||
if (execbuf.syncobj_values) {
|
|
||||||
execbuf.timeline_fences.fence_count = execbuf.syncobj_count;
|
|
||||||
execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs;
|
|
||||||
execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values;
|
|
||||||
anv_execbuf_add_ext(&execbuf,
|
|
||||||
DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
|
|
||||||
&execbuf.timeline_fences.base);
|
|
||||||
} else if (execbuf.syncobjs) {
|
|
||||||
execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
|
|
||||||
execbuf.execbuf.num_cliprects = execbuf.syncobj_count;
|
|
||||||
execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (has_perf_query) {
|
|
||||||
assert(perf_query_pass < perf_query_pool->n_passes);
|
|
||||||
struct intel_perf_query_info *query_info =
|
|
||||||
perf_query_pool->pass_query[perf_query_pass];
|
|
||||||
|
|
||||||
/* Some performance queries just the pipeline statistic HW, no need for
|
|
||||||
* OA in that case, so no need to reconfigure.
|
|
||||||
*/
|
|
||||||
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
|
|
||||||
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
|
|
||||||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
|
|
||||||
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
|
|
||||||
(void *)(uintptr_t) query_info->oa_metrics_set_id);
|
|
||||||
if (ret < 0) {
|
|
||||||
result = vk_device_set_lost(&device->vk,
|
|
||||||
"i915-perf config failed: %s",
|
|
||||||
strerror(errno));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct anv_bo *pass_batch_bo = perf_query_pool->bo;
|
|
||||||
|
|
||||||
struct drm_i915_gem_exec_object2 query_pass_object = {
|
|
||||||
.handle = pass_batch_bo->gem_handle,
|
|
||||||
.offset = pass_batch_bo->offset,
|
|
||||||
.flags = pass_batch_bo->flags,
|
|
||||||
};
|
|
||||||
struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
|
|
||||||
.buffers_ptr = (uintptr_t) &query_pass_object,
|
|
||||||
.buffer_count = 1,
|
|
||||||
.batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
|
|
||||||
perf_query_pass),
|
|
||||||
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags,
|
|
||||||
.rsvd1 = device->context_id,
|
|
||||||
};
|
|
||||||
|
|
||||||
int ret = queue->device->info->no_hw ? 0 :
|
|
||||||
anv_gem_execbuffer(queue->device, &query_pass_execbuf);
|
|
||||||
if (ret)
|
|
||||||
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
|
|
||||||
}
|
|
||||||
|
|
||||||
int ret = queue->device->info->no_hw ? 0 :
|
|
||||||
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
|
|
||||||
if (ret)
|
|
||||||
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
|
|
||||||
|
|
||||||
if (result == VK_SUCCESS && queue->sync) {
|
|
||||||
result = vk_sync_wait(&device->vk, queue->sync, 0,
|
|
||||||
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
result = vk_queue_set_lost(&queue->vk, "sync wait failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
error:
|
|
||||||
anv_execbuf_finish(&execbuf);
|
|
||||||
|
|
||||||
if (result == VK_SUCCESS && utrace_flush_data)
|
|
||||||
result = anv_queue_exec_utrace_locked(queue, utrace_flush_data);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
@@ -2051,46 +1353,6 @@ anv_queue_submit(struct vk_queue *vk_queue,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
|
||||||
anv_i915_execute_simple_batch(struct anv_queue *queue,
|
|
||||||
struct anv_bo *batch_bo,
|
|
||||||
uint32_t batch_bo_size)
|
|
||||||
{
|
|
||||||
struct anv_device *device = queue->device;
|
|
||||||
struct anv_execbuf execbuf = {
|
|
||||||
.alloc = &queue->device->vk.alloc,
|
|
||||||
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
|
||||||
};
|
|
||||||
|
|
||||||
VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
|
|
||||||
.buffers_ptr = (uintptr_t) execbuf.objects,
|
|
||||||
.buffer_count = execbuf.bo_count,
|
|
||||||
.batch_start_offset = 0,
|
|
||||||
.batch_len = batch_bo_size,
|
|
||||||
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
|
|
||||||
.rsvd1 = device->context_id,
|
|
||||||
.rsvd2 = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (anv_gem_execbuffer(device, &execbuf.execbuf)) {
|
|
||||||
result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = anv_device_wait(device, batch_bo, INT64_MAX);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
result = vk_device_set_lost(&device->vk,
|
|
||||||
"anv_device_wait failed: %m");
|
|
||||||
|
|
||||||
fail:
|
|
||||||
anv_execbuf_finish(&execbuf);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
anv_queue_submit_simple_batch(struct anv_queue *queue,
|
anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||||
struct anv_batch *batch)
|
struct anv_batch *batch)
|
||||||
|
@@ -216,16 +216,6 @@ anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
anv_gem_execbuffer(struct anv_device *device,
|
|
||||||
struct drm_i915_gem_execbuffer2 *execbuf)
|
|
||||||
{
|
|
||||||
if (execbuf->flags & I915_EXEC_FENCE_OUT)
|
|
||||||
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
|
|
||||||
else
|
|
||||||
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Return -1 on error. */
|
/** Return -1 on error. */
|
||||||
int
|
int
|
||||||
anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
|
anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)
|
||||||
|
@@ -91,13 +91,6 @@ anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
anv_gem_execbuffer(struct anv_device *device,
|
|
||||||
struct drm_i915_gem_execbuffer2 *execbuf)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
anv_gem_set_tiling(struct anv_device *device,
|
anv_gem_set_tiling(struct anv_device *device,
|
||||||
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
|
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
|
||||||
|
@@ -1348,8 +1348,6 @@ uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
|
|||||||
struct drm_i915_gem_memory_class_instance *regions);
|
struct drm_i915_gem_memory_class_instance *regions);
|
||||||
uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
|
uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
|
||||||
int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
|
int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
|
||||||
int anv_gem_execbuffer(struct anv_device *device,
|
|
||||||
struct drm_i915_gem_execbuffer2 *execbuf);
|
|
||||||
int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
|
int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
|
||||||
uint32_t stride, uint32_t tiling);
|
uint32_t stride, uint32_t tiling);
|
||||||
int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
|
int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
|
||||||
|
796
src/intel/vulkan/i915/anv_batch_chain.c
Normal file
796
src/intel/vulkan/i915/anv_batch_chain.c
Normal file
@@ -0,0 +1,796 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "i915/anv_batch_chain.h"
|
||||||
|
#include "anv_private.h"
|
||||||
|
#include "anv_measure.h"
|
||||||
|
|
||||||
|
#include "perf/intel_perf.h"
|
||||||
|
#include "util/u_debug.h"
|
||||||
|
|
||||||
|
#include "drm-uapi/i915_drm.h"
|
||||||
|
|
||||||
|
struct anv_execbuf {
|
||||||
|
struct drm_i915_gem_execbuffer2 execbuf;
|
||||||
|
|
||||||
|
struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
|
||||||
|
|
||||||
|
struct drm_i915_gem_exec_object2 * objects;
|
||||||
|
uint32_t bo_count;
|
||||||
|
struct anv_bo ** bos;
|
||||||
|
|
||||||
|
/* Allocated length of the 'objects' and 'bos' arrays */
|
||||||
|
uint32_t array_length;
|
||||||
|
|
||||||
|
uint32_t syncobj_count;
|
||||||
|
uint32_t syncobj_array_length;
|
||||||
|
struct drm_i915_gem_exec_fence * syncobjs;
|
||||||
|
uint64_t * syncobj_values;
|
||||||
|
|
||||||
|
uint32_t cmd_buffer_count;
|
||||||
|
struct anv_query_pool *perf_query_pool;
|
||||||
|
|
||||||
|
const VkAllocationCallbacks * alloc;
|
||||||
|
VkSystemAllocationScope alloc_scope;
|
||||||
|
|
||||||
|
int perf_query_pass;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
anv_execbuf_finish(struct anv_execbuf *exec)
|
||||||
|
{
|
||||||
|
vk_free(exec->alloc, exec->syncobjs);
|
||||||
|
vk_free(exec->alloc, exec->syncobj_values);
|
||||||
|
vk_free(exec->alloc, exec->objects);
|
||||||
|
vk_free(exec->alloc, exec->bos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
anv_execbuf_add_ext(struct anv_execbuf *exec,
|
||||||
|
uint32_t ext_name,
|
||||||
|
struct i915_user_extension *ext)
|
||||||
|
{
|
||||||
|
__u64 *iter = &exec->execbuf.cliprects_ptr;
|
||||||
|
|
||||||
|
exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
|
||||||
|
|
||||||
|
while (*iter != 0) {
|
||||||
|
iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
|
||||||
|
}
|
||||||
|
|
||||||
|
ext->name = ext_name;
|
||||||
|
|
||||||
|
*iter = (uintptr_t) ext;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
anv_execbuf_add_bo_bitset(struct anv_device *device,
|
||||||
|
struct anv_execbuf *exec,
|
||||||
|
uint32_t dep_words,
|
||||||
|
BITSET_WORD *deps,
|
||||||
|
uint32_t extra_flags);
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
anv_execbuf_add_bo(struct anv_device *device,
|
||||||
|
struct anv_execbuf *exec,
|
||||||
|
struct anv_bo *bo,
|
||||||
|
struct anv_reloc_list *relocs,
|
||||||
|
uint32_t extra_flags)
|
||||||
|
{
|
||||||
|
struct drm_i915_gem_exec_object2 *obj = NULL;
|
||||||
|
|
||||||
|
if (bo->exec_obj_index < exec->bo_count &&
|
||||||
|
exec->bos[bo->exec_obj_index] == bo)
|
||||||
|
obj = &exec->objects[bo->exec_obj_index];
|
||||||
|
|
||||||
|
if (obj == NULL) {
|
||||||
|
/* We've never seen this one before. Add it to the list and assign
|
||||||
|
* an id that we can use later.
|
||||||
|
*/
|
||||||
|
if (exec->bo_count >= exec->array_length) {
|
||||||
|
uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
|
||||||
|
|
||||||
|
struct drm_i915_gem_exec_object2 *new_objects =
|
||||||
|
vk_alloc(exec->alloc, new_len * sizeof(*new_objects), 8, exec->alloc_scope);
|
||||||
|
if (new_objects == NULL)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
struct anv_bo **new_bos =
|
||||||
|
vk_alloc(exec->alloc, new_len * sizeof(*new_bos), 8, exec->alloc_scope);
|
||||||
|
if (new_bos == NULL) {
|
||||||
|
vk_free(exec->alloc, new_objects);
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exec->objects) {
|
||||||
|
memcpy(new_objects, exec->objects,
|
||||||
|
exec->bo_count * sizeof(*new_objects));
|
||||||
|
memcpy(new_bos, exec->bos,
|
||||||
|
exec->bo_count * sizeof(*new_bos));
|
||||||
|
}
|
||||||
|
|
||||||
|
vk_free(exec->alloc, exec->objects);
|
||||||
|
vk_free(exec->alloc, exec->bos);
|
||||||
|
|
||||||
|
exec->objects = new_objects;
|
||||||
|
exec->bos = new_bos;
|
||||||
|
exec->array_length = new_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(exec->bo_count < exec->array_length);
|
||||||
|
|
||||||
|
bo->exec_obj_index = exec->bo_count++;
|
||||||
|
obj = &exec->objects[bo->exec_obj_index];
|
||||||
|
exec->bos[bo->exec_obj_index] = bo;
|
||||||
|
|
||||||
|
obj->handle = bo->gem_handle;
|
||||||
|
obj->relocation_count = 0;
|
||||||
|
obj->relocs_ptr = 0;
|
||||||
|
obj->alignment = 0;
|
||||||
|
obj->offset = bo->offset;
|
||||||
|
obj->flags = bo->flags | extra_flags;
|
||||||
|
obj->rsvd1 = 0;
|
||||||
|
obj->rsvd2 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extra_flags & EXEC_OBJECT_WRITE) {
|
||||||
|
obj->flags |= EXEC_OBJECT_WRITE;
|
||||||
|
obj->flags &= ~EXEC_OBJECT_ASYNC;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (relocs != NULL) {
|
||||||
|
return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
|
||||||
|
relocs->deps, extra_flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add BO dependencies to execbuf */
|
||||||
|
static VkResult
|
||||||
|
anv_execbuf_add_bo_bitset(struct anv_device *device,
|
||||||
|
struct anv_execbuf *exec,
|
||||||
|
uint32_t dep_words,
|
||||||
|
BITSET_WORD *deps,
|
||||||
|
uint32_t extra_flags)
|
||||||
|
{
|
||||||
|
for (uint32_t w = 0; w < dep_words; w++) {
|
||||||
|
BITSET_WORD mask = deps[w];
|
||||||
|
while (mask) {
|
||||||
|
int i = u_bit_scan(&mask);
|
||||||
|
uint32_t gem_handle = w * BITSET_WORDBITS + i;
|
||||||
|
struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
|
||||||
|
assert(bo->refcount > 0);
|
||||||
|
VkResult result =
|
||||||
|
anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
anv_execbuf_add_syncobj(struct anv_device *device,
|
||||||
|
struct anv_execbuf *exec,
|
||||||
|
uint32_t syncobj,
|
||||||
|
uint32_t flags,
|
||||||
|
uint64_t timeline_value)
|
||||||
|
{
|
||||||
|
if (exec->syncobj_count >= exec->syncobj_array_length) {
|
||||||
|
uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
|
||||||
|
|
||||||
|
struct drm_i915_gem_exec_fence *new_syncobjs =
|
||||||
|
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobjs),
|
||||||
|
8, exec->alloc_scope);
|
||||||
|
if (!new_syncobjs)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
if (exec->syncobjs)
|
||||||
|
typed_memcpy(new_syncobjs, exec->syncobjs, exec->syncobj_count);
|
||||||
|
|
||||||
|
exec->syncobjs = new_syncobjs;
|
||||||
|
|
||||||
|
if (exec->syncobj_values) {
|
||||||
|
uint64_t *new_syncobj_values =
|
||||||
|
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobj_values),
|
||||||
|
8, exec->alloc_scope);
|
||||||
|
if (!new_syncobj_values)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
typed_memcpy(new_syncobj_values, exec->syncobj_values,
|
||||||
|
exec->syncobj_count);
|
||||||
|
|
||||||
|
exec->syncobj_values = new_syncobj_values;
|
||||||
|
}
|
||||||
|
|
||||||
|
exec->syncobj_array_length = new_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (timeline_value && !exec->syncobj_values) {
|
||||||
|
exec->syncobj_values =
|
||||||
|
vk_zalloc(exec->alloc, exec->syncobj_array_length *
|
||||||
|
sizeof(*exec->syncobj_values),
|
||||||
|
8, exec->alloc_scope);
|
||||||
|
if (!exec->syncobj_values)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
}
|
||||||
|
|
||||||
|
exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
|
||||||
|
.handle = syncobj,
|
||||||
|
.flags = flags,
|
||||||
|
};
|
||||||
|
if (timeline_value)
|
||||||
|
exec->syncobj_values[exec->syncobj_count] = timeline_value;
|
||||||
|
|
||||||
|
exec->syncobj_count++;
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
anv_execbuf_add_sync(struct anv_device *device,
|
||||||
|
struct anv_execbuf *execbuf,
|
||||||
|
struct vk_sync *sync,
|
||||||
|
bool is_signal,
|
||||||
|
uint64_t value)
|
||||||
|
{
|
||||||
|
/* It's illegal to signal a timeline with value 0 because that's never
|
||||||
|
* higher than the current value. A timeline wait on value 0 is always
|
||||||
|
* trivial because 0 <= uint64_t always.
|
||||||
|
*/
|
||||||
|
if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
|
||||||
|
return VK_SUCCESS;
|
||||||
|
|
||||||
|
if (vk_sync_is_anv_bo_sync(sync)) {
|
||||||
|
struct anv_bo_sync *bo_sync =
|
||||||
|
container_of(sync, struct anv_bo_sync, sync);
|
||||||
|
|
||||||
|
assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
|
||||||
|
|
||||||
|
return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
|
||||||
|
is_signal ? EXEC_OBJECT_WRITE : 0);
|
||||||
|
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
||||||
|
struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
|
||||||
|
|
||||||
|
if (!(sync->flags & VK_SYNC_IS_TIMELINE))
|
||||||
|
value = 0;
|
||||||
|
|
||||||
|
return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
|
||||||
|
is_signal ? I915_EXEC_FENCE_SIGNAL :
|
||||||
|
I915_EXEC_FENCE_WAIT,
|
||||||
|
value);
|
||||||
|
}
|
||||||
|
|
||||||
|
unreachable("Invalid sync type");
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
|
||||||
|
struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
VkResult result;
|
||||||
|
/* Add surface dependencies (BOs) to the execbuf */
|
||||||
|
anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
|
||||||
|
cmd_buffer->surface_relocs.dep_words,
|
||||||
|
cmd_buffer->surface_relocs.deps, 0);
|
||||||
|
|
||||||
|
/* First, we walk over all of the bos we've seen and add them and their
|
||||||
|
* relocations to the validate list.
|
||||||
|
*/
|
||||||
|
struct anv_batch_bo **bbo;
|
||||||
|
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
|
||||||
|
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
|
||||||
|
(*bbo)->bo, &(*bbo)->relocs, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct anv_bo **bo_entry;
|
||||||
|
u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
|
||||||
|
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
|
||||||
|
*bo_entry, NULL, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
pin_state_pool(struct anv_device *device,
|
||||||
|
struct anv_execbuf *execbuf,
|
||||||
|
struct anv_state_pool *pool)
|
||||||
|
{
|
||||||
|
anv_block_pool_foreach_bo(bo, &pool->block_pool) {
|
||||||
|
VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
|
||||||
|
struct anv_queue *queue,
|
||||||
|
struct anv_cmd_buffer **cmd_buffers,
|
||||||
|
uint32_t num_cmd_buffers)
|
||||||
|
{
|
||||||
|
struct anv_device *device = queue->device;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
/* Edit the tail of the command buffers to chain them all together if they
|
||||||
|
* can be.
|
||||||
|
*/
|
||||||
|
anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
|
||||||
|
anv_measure_submit(cmd_buffers[i]);
|
||||||
|
result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add all the global BOs to the object list for softpin case. */
|
||||||
|
result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = pin_state_pool(device, execbuf, &device->general_state_pool);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = pin_state_pool(device, execbuf, &device->binding_table_pool);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
/* Add the BOs for all user allocated memory objects because we can't
|
||||||
|
* track after binding updates of VK_EXT_descriptor_indexing.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(struct anv_device_memory, mem,
|
||||||
|
&device->memory_objects, link) {
|
||||||
|
result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < execbuf->bo_count; i++)
|
||||||
|
execbuf->objects[i].offset = execbuf->bos[i]->offset;
|
||||||
|
|
||||||
|
struct anv_batch_bo *first_batch_bo =
|
||||||
|
list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link);
|
||||||
|
|
||||||
|
/* The kernel requires that the last entry in the validation list be the
|
||||||
|
* batch buffer to execute. We can simply swap the element
|
||||||
|
* corresponding to the first batch_bo in the chain with the last
|
||||||
|
* element in the list.
|
||||||
|
*/
|
||||||
|
if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
|
||||||
|
uint32_t idx = first_batch_bo->bo->exec_obj_index;
|
||||||
|
uint32_t last_idx = execbuf->bo_count - 1;
|
||||||
|
|
||||||
|
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
|
||||||
|
assert(execbuf->bos[idx] == first_batch_bo->bo);
|
||||||
|
|
||||||
|
execbuf->objects[idx] = execbuf->objects[last_idx];
|
||||||
|
execbuf->bos[idx] = execbuf->bos[last_idx];
|
||||||
|
execbuf->bos[idx]->exec_obj_index = idx;
|
||||||
|
|
||||||
|
execbuf->objects[last_idx] = tmp_obj;
|
||||||
|
execbuf->bos[last_idx] = first_batch_bo->bo;
|
||||||
|
first_batch_bo->bo->exec_obj_index = last_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||||
|
if (device->physical->memory.need_clflush) {
|
||||||
|
__builtin_ia32_mfence();
|
||||||
|
struct anv_batch_bo **bbo;
|
||||||
|
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
|
||||||
|
u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) {
|
||||||
|
for (uint32_t l = 0; l < (*bbo)->length; l += CACHELINE_SIZE)
|
||||||
|
__builtin_ia32_clflush((*bbo)->bo->map + l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||||
|
.buffers_ptr = (uintptr_t) execbuf->objects,
|
||||||
|
.buffer_count = execbuf->bo_count,
|
||||||
|
.batch_start_offset = 0,
|
||||||
|
/* We'll fill in batch length later when chaining batches. */
|
||||||
|
.batch_len = 0,
|
||||||
|
.cliprects_ptr = 0,
|
||||||
|
.num_cliprects = 0,
|
||||||
|
.DR1 = 0,
|
||||||
|
.DR4 = 0,
|
||||||
|
.flags = I915_EXEC_NO_RELOC |
|
||||||
|
I915_EXEC_HANDLE_LUT |
|
||||||
|
queue->exec_flags,
|
||||||
|
.rsvd1 = device->context_id,
|
||||||
|
.rsvd2 = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
|
||||||
|
{
|
||||||
|
struct anv_device *device = queue->device;
|
||||||
|
VkResult result = anv_execbuf_add_bo(device, execbuf,
|
||||||
|
device->trivial_batch_bo,
|
||||||
|
NULL, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||||
|
.buffers_ptr = (uintptr_t) execbuf->objects,
|
||||||
|
.buffer_count = execbuf->bo_count,
|
||||||
|
.batch_start_offset = 0,
|
||||||
|
.batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
|
||||||
|
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
|
||||||
|
.rsvd1 = device->context_id,
|
||||||
|
.rsvd2 = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
|
||||||
|
struct anv_utrace_flush_copy *flush)
|
||||||
|
{
|
||||||
|
struct anv_device *device = queue->device;
|
||||||
|
VkResult result = anv_execbuf_add_bo(device, execbuf,
|
||||||
|
flush->batch_bo,
|
||||||
|
&flush->relocs, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
result = anv_execbuf_add_sync(device, execbuf, flush->sync,
|
||||||
|
true /* is_signal */, 0 /* value */);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) {
|
||||||
|
uint32_t idx = flush->batch_bo->exec_obj_index;
|
||||||
|
uint32_t last_idx = execbuf->bo_count - 1;
|
||||||
|
|
||||||
|
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
|
||||||
|
assert(execbuf->bos[idx] == flush->batch_bo);
|
||||||
|
|
||||||
|
execbuf->objects[idx] = execbuf->objects[last_idx];
|
||||||
|
execbuf->bos[idx] = execbuf->bos[last_idx];
|
||||||
|
execbuf->bos[idx]->exec_obj_index = idx;
|
||||||
|
|
||||||
|
execbuf->objects[last_idx] = tmp_obj;
|
||||||
|
execbuf->bos[last_idx] = flush->batch_bo;
|
||||||
|
flush->batch_bo->exec_obj_index = last_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||||
|
if (device->physical->memory.need_clflush)
|
||||||
|
intel_flush_range(flush->batch_bo->map, flush->batch_bo->size);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||||
|
.buffers_ptr = (uintptr_t) execbuf->objects,
|
||||||
|
.buffer_count = execbuf->bo_count,
|
||||||
|
.batch_start_offset = 0,
|
||||||
|
.batch_len = flush->batch.next - flush->batch.start,
|
||||||
|
.flags = I915_EXEC_NO_RELOC |
|
||||||
|
I915_EXEC_HANDLE_LUT |
|
||||||
|
I915_EXEC_FENCE_ARRAY |
|
||||||
|
queue->exec_flags,
|
||||||
|
.rsvd1 = device->context_id,
|
||||||
|
.rsvd2 = 0,
|
||||||
|
.num_cliprects = execbuf->syncobj_count,
|
||||||
|
.cliprects_ptr = (uintptr_t)execbuf->syncobjs,
|
||||||
|
};
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
anv_gem_execbuffer(struct anv_device *device,
|
||||||
|
struct drm_i915_gem_execbuffer2 *execbuf)
|
||||||
|
{
|
||||||
|
if (execbuf->flags & I915_EXEC_FENCE_OUT)
|
||||||
|
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
|
||||||
|
else
|
||||||
|
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
anv_queue_exec_utrace_locked(struct anv_queue *queue,
|
||||||
|
struct anv_utrace_flush_copy *flush)
|
||||||
|
{
|
||||||
|
assert(flush->batch_bo);
|
||||||
|
|
||||||
|
struct anv_device *device = queue->device;
|
||||||
|
struct anv_execbuf execbuf = {
|
||||||
|
.alloc = &device->vk.alloc,
|
||||||
|
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkResult result = setup_utrace_execbuf(&execbuf, queue, flush);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
int ret = queue->device->info->no_hw ? 0 :
|
||||||
|
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
|
||||||
|
if (ret)
|
||||||
|
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
|
||||||
|
|
||||||
|
error:
|
||||||
|
anv_execbuf_finish(&execbuf);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
anv_i915_queue_exec_locked(struct anv_queue *queue,
|
||||||
|
uint32_t wait_count,
|
||||||
|
const struct vk_sync_wait *waits,
|
||||||
|
uint32_t cmd_buffer_count,
|
||||||
|
struct anv_cmd_buffer **cmd_buffers,
|
||||||
|
uint32_t signal_count,
|
||||||
|
const struct vk_sync_signal *signals,
|
||||||
|
struct anv_query_pool *perf_query_pool,
|
||||||
|
uint32_t perf_query_pass)
|
||||||
|
{
|
||||||
|
struct anv_device *device = queue->device;
|
||||||
|
struct anv_utrace_flush_copy *utrace_flush_data = NULL;
|
||||||
|
struct anv_execbuf execbuf = {
|
||||||
|
.alloc = &queue->device->vk.alloc,
|
||||||
|
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
||||||
|
.perf_query_pass = perf_query_pass,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Flush the trace points first, they need to be moved */
|
||||||
|
VkResult result =
|
||||||
|
anv_device_utrace_flush_cmd_buffers(queue,
|
||||||
|
cmd_buffer_count,
|
||||||
|
cmd_buffers,
|
||||||
|
&utrace_flush_data);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (utrace_flush_data && !utrace_flush_data->batch_bo) {
|
||||||
|
result = anv_execbuf_add_sync(device, &execbuf,
|
||||||
|
utrace_flush_data->sync,
|
||||||
|
true /* is_signal */,
|
||||||
|
0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
utrace_flush_data = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Always add the workaround BO as it includes a driver identifier for the
|
||||||
|
* error_state.
|
||||||
|
*/
|
||||||
|
result =
|
||||||
|
anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < wait_count; i++) {
|
||||||
|
result = anv_execbuf_add_sync(device, &execbuf,
|
||||||
|
waits[i].sync,
|
||||||
|
false /* is_signal */,
|
||||||
|
waits[i].wait_value);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < signal_count; i++) {
|
||||||
|
result = anv_execbuf_add_sync(device, &execbuf,
|
||||||
|
signals[i].sync,
|
||||||
|
true /* is_signal */,
|
||||||
|
signals[i].signal_value);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (queue->sync) {
|
||||||
|
result = anv_execbuf_add_sync(device, &execbuf,
|
||||||
|
queue->sync,
|
||||||
|
true /* is_signal */,
|
||||||
|
0 /* signal_value */);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd_buffer_count) {
|
||||||
|
result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
|
||||||
|
cmd_buffers,
|
||||||
|
cmd_buffer_count);
|
||||||
|
} else {
|
||||||
|
result = setup_empty_execbuf(&execbuf, queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
const bool has_perf_query =
|
||||||
|
perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
|
||||||
|
|
||||||
|
if (INTEL_DEBUG(DEBUG_SUBMIT)) {
|
||||||
|
uint32_t total_size_kb = 0;
|
||||||
|
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
|
||||||
|
const struct anv_bo *bo = execbuf.bos[i];
|
||||||
|
total_size_kb += bo->size / 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (%.1fMb aperture)\n",
|
||||||
|
execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len,
|
||||||
|
(float)total_size_kb / 1024.0f);
|
||||||
|
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
|
||||||
|
const struct anv_bo *bo = execbuf.bos[i];
|
||||||
|
uint64_t size = bo->size + bo->_ccs_size;
|
||||||
|
|
||||||
|
fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
|
||||||
|
"KB handle=%05u capture=%u name=%s\n",
|
||||||
|
bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle,
|
||||||
|
(bo->flags & EXEC_OBJECT_CAPTURE) != 0, bo->name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
|
||||||
|
perf_query_pool, perf_query_pass);
|
||||||
|
|
||||||
|
if (execbuf.syncobj_values) {
|
||||||
|
execbuf.timeline_fences.fence_count = execbuf.syncobj_count;
|
||||||
|
execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs;
|
||||||
|
execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values;
|
||||||
|
anv_execbuf_add_ext(&execbuf,
|
||||||
|
DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
|
||||||
|
&execbuf.timeline_fences.base);
|
||||||
|
} else if (execbuf.syncobjs) {
|
||||||
|
execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
|
||||||
|
execbuf.execbuf.num_cliprects = execbuf.syncobj_count;
|
||||||
|
execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_perf_query) {
|
||||||
|
assert(perf_query_pass < perf_query_pool->n_passes);
|
||||||
|
struct intel_perf_query_info *query_info =
|
||||||
|
perf_query_pool->pass_query[perf_query_pass];
|
||||||
|
|
||||||
|
/* Some performance queries just the pipeline statistic HW, no need for
|
||||||
|
* OA in that case, so no need to reconfigure.
|
||||||
|
*/
|
||||||
|
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
|
||||||
|
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
|
||||||
|
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
|
||||||
|
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
|
||||||
|
(void *)(uintptr_t) query_info->oa_metrics_set_id);
|
||||||
|
if (ret < 0) {
|
||||||
|
result = vk_device_set_lost(&device->vk,
|
||||||
|
"i915-perf config failed: %s",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct anv_bo *pass_batch_bo = perf_query_pool->bo;
|
||||||
|
|
||||||
|
struct drm_i915_gem_exec_object2 query_pass_object = {
|
||||||
|
.handle = pass_batch_bo->gem_handle,
|
||||||
|
.offset = pass_batch_bo->offset,
|
||||||
|
.flags = pass_batch_bo->flags,
|
||||||
|
};
|
||||||
|
struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
|
||||||
|
.buffers_ptr = (uintptr_t) &query_pass_object,
|
||||||
|
.buffer_count = 1,
|
||||||
|
.batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
|
||||||
|
perf_query_pass),
|
||||||
|
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags,
|
||||||
|
.rsvd1 = device->context_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
int ret = queue->device->info->no_hw ? 0 :
|
||||||
|
anv_gem_execbuffer(queue->device, &query_pass_execbuf);
|
||||||
|
if (ret)
|
||||||
|
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = queue->device->info->no_hw ? 0 :
|
||||||
|
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
|
||||||
|
if (ret)
|
||||||
|
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
|
||||||
|
|
||||||
|
if (result == VK_SUCCESS && queue->sync) {
|
||||||
|
result = vk_sync_wait(&device->vk, queue->sync, 0,
|
||||||
|
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
result = vk_queue_set_lost(&queue->vk, "sync wait failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
error:
|
||||||
|
anv_execbuf_finish(&execbuf);
|
||||||
|
|
||||||
|
if (result == VK_SUCCESS && utrace_flush_data)
|
||||||
|
result = anv_queue_exec_utrace_locked(queue, utrace_flush_data);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
anv_i915_execute_simple_batch(struct anv_queue *queue,
|
||||||
|
struct anv_bo *batch_bo,
|
||||||
|
uint32_t batch_bo_size)
|
||||||
|
{
|
||||||
|
struct anv_device *device = queue->device;
|
||||||
|
struct anv_execbuf execbuf = {
|
||||||
|
.alloc = &queue->device->vk.alloc,
|
||||||
|
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||||
|
.buffers_ptr = (uintptr_t) execbuf.objects,
|
||||||
|
.buffer_count = execbuf.bo_count,
|
||||||
|
.batch_start_offset = 0,
|
||||||
|
.batch_len = batch_bo_size,
|
||||||
|
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
|
||||||
|
.rsvd1 = device->context_id,
|
||||||
|
.rsvd2 = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (anv_gem_execbuffer(device, &execbuf.execbuf)) {
|
||||||
|
result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = anv_device_wait(device, batch_bo, INT64_MAX);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
result = vk_device_set_lost(&device->vk,
|
||||||
|
"anv_device_wait failed: %m");
|
||||||
|
|
||||||
|
fail:
|
||||||
|
anv_execbuf_finish(&execbuf);
|
||||||
|
return result;
|
||||||
|
}
|
49
src/intel/vulkan/i915/anv_batch_chain.h
Normal file
49
src/intel/vulkan/i915/anv_batch_chain.h
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "vulkan/vulkan_core.h"
|
||||||
|
|
||||||
|
#include "vk_sync.h"
|
||||||
|
|
||||||
|
struct anv_queue;
|
||||||
|
struct anv_bo;
|
||||||
|
struct anv_cmd_buffer;
|
||||||
|
struct anv_query_pool;
|
||||||
|
|
||||||
|
VkResult anv_i915_execute_simple_batch(struct anv_queue *queue,
|
||||||
|
struct anv_bo *batch_bo,
|
||||||
|
uint32_t batch_bo_size);
|
||||||
|
VkResult
|
||||||
|
anv_i915_queue_exec_locked(struct anv_queue *queue,
|
||||||
|
uint32_t wait_count,
|
||||||
|
const struct vk_sync_wait *waits,
|
||||||
|
uint32_t cmd_buffer_count,
|
||||||
|
struct anv_cmd_buffer **cmd_buffers,
|
||||||
|
uint32_t signal_count,
|
||||||
|
const struct vk_sync_signal *signals,
|
||||||
|
struct anv_query_pool *perf_query_pool,
|
||||||
|
uint32_t perf_query_pass);
|
@@ -132,6 +132,8 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
|
|||||||
endforeach
|
endforeach
|
||||||
|
|
||||||
libanv_files = files(
|
libanv_files = files(
|
||||||
|
'i915/anv_batch_chain.c',
|
||||||
|
'i915/anv_batch_chain.h',
|
||||||
'i915/anv_device.c',
|
'i915/anv_device.c',
|
||||||
'i915/anv_device.h',
|
'i915/anv_device.h',
|
||||||
'layers/anv_doom64.c',
|
'layers/anv_doom64.c',
|
||||||
|
Reference in New Issue
Block a user