anv: Split i915 code from anv_batch_chain.c

There is no change in behavior here.

Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Rohan Garg <rohan.garg@intel.com>
Acked-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20428>
This commit is contained in:
José Roberto de Souza
2022-09-02 06:55:22 -07:00
committed by Marge Bot
parent 94ca73b356
commit 94af444490
7 changed files with 853 additions and 763 deletions

View File

@@ -34,11 +34,11 @@
#include "genxml/gen8_pack.h"
#include "genxml/genX_bits.h"
#include "perf/intel_perf.h"
#include "util/u_debug.h"
#include "util/perf/u_trace.h"
#include "i915/anv_batch_chain.h"
/** \file anv_batch_chain.c
*
* This file contains functions related to anv_cmd_buffer as a data
@@ -1139,294 +1139,6 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
&secondary->surface_relocs);
}
struct anv_execbuf {
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
struct drm_i915_gem_exec_object2 * objects;
uint32_t bo_count;
struct anv_bo ** bos;
/* Allocated length of the 'objects' and 'bos' arrays */
uint32_t array_length;
uint32_t syncobj_count;
uint32_t syncobj_array_length;
struct drm_i915_gem_exec_fence * syncobjs;
uint64_t * syncobj_values;
uint32_t cmd_buffer_count;
struct anv_query_pool *perf_query_pool;
const VkAllocationCallbacks * alloc;
VkSystemAllocationScope alloc_scope;
int perf_query_pass;
};
static void
anv_execbuf_finish(struct anv_execbuf *exec)
{
vk_free(exec->alloc, exec->syncobjs);
vk_free(exec->alloc, exec->syncobj_values);
vk_free(exec->alloc, exec->objects);
vk_free(exec->alloc, exec->bos);
}
static void
anv_execbuf_add_ext(struct anv_execbuf *exec,
uint32_t ext_name,
struct i915_user_extension *ext)
{
__u64 *iter = &exec->execbuf.cliprects_ptr;
exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
while (*iter != 0) {
iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
}
ext->name = ext_name;
*iter = (uintptr_t) ext;
}
static VkResult
anv_execbuf_add_bo_bitset(struct anv_device *device,
struct anv_execbuf *exec,
uint32_t dep_words,
BITSET_WORD *deps,
uint32_t extra_flags);
static VkResult
anv_execbuf_add_bo(struct anv_device *device,
struct anv_execbuf *exec,
struct anv_bo *bo,
struct anv_reloc_list *relocs,
uint32_t extra_flags)
{
struct drm_i915_gem_exec_object2 *obj = NULL;
if (bo->exec_obj_index < exec->bo_count &&
exec->bos[bo->exec_obj_index] == bo)
obj = &exec->objects[bo->exec_obj_index];
if (obj == NULL) {
/* We've never seen this one before. Add it to the list and assign
* an id that we can use later.
*/
if (exec->bo_count >= exec->array_length) {
uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
struct drm_i915_gem_exec_object2 *new_objects =
vk_alloc(exec->alloc, new_len * sizeof(*new_objects), 8, exec->alloc_scope);
if (new_objects == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
struct anv_bo **new_bos =
vk_alloc(exec->alloc, new_len * sizeof(*new_bos), 8, exec->alloc_scope);
if (new_bos == NULL) {
vk_free(exec->alloc, new_objects);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
if (exec->objects) {
memcpy(new_objects, exec->objects,
exec->bo_count * sizeof(*new_objects));
memcpy(new_bos, exec->bos,
exec->bo_count * sizeof(*new_bos));
}
vk_free(exec->alloc, exec->objects);
vk_free(exec->alloc, exec->bos);
exec->objects = new_objects;
exec->bos = new_bos;
exec->array_length = new_len;
}
assert(exec->bo_count < exec->array_length);
bo->exec_obj_index = exec->bo_count++;
obj = &exec->objects[bo->exec_obj_index];
exec->bos[bo->exec_obj_index] = bo;
obj->handle = bo->gem_handle;
obj->relocation_count = 0;
obj->relocs_ptr = 0;
obj->alignment = 0;
obj->offset = bo->offset;
obj->flags = bo->flags | extra_flags;
obj->rsvd1 = 0;
obj->rsvd2 = 0;
}
if (extra_flags & EXEC_OBJECT_WRITE) {
obj->flags |= EXEC_OBJECT_WRITE;
obj->flags &= ~EXEC_OBJECT_ASYNC;
}
if (relocs != NULL) {
return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
relocs->deps, extra_flags);
}
return VK_SUCCESS;
}
/* Add BO dependencies to execbuf */
static VkResult
anv_execbuf_add_bo_bitset(struct anv_device *device,
struct anv_execbuf *exec,
uint32_t dep_words,
BITSET_WORD *deps,
uint32_t extra_flags)
{
for (uint32_t w = 0; w < dep_words; w++) {
BITSET_WORD mask = deps[w];
while (mask) {
int i = u_bit_scan(&mask);
uint32_t gem_handle = w * BITSET_WORDBITS + i;
struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
assert(bo->refcount > 0);
VkResult result =
anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
if (result != VK_SUCCESS)
return result;
}
}
return VK_SUCCESS;
}
static VkResult
anv_execbuf_add_syncobj(struct anv_device *device,
struct anv_execbuf *exec,
uint32_t syncobj,
uint32_t flags,
uint64_t timeline_value)
{
if (exec->syncobj_count >= exec->syncobj_array_length) {
uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
struct drm_i915_gem_exec_fence *new_syncobjs =
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobjs),
8, exec->alloc_scope);
if (!new_syncobjs)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (exec->syncobjs)
typed_memcpy(new_syncobjs, exec->syncobjs, exec->syncobj_count);
exec->syncobjs = new_syncobjs;
if (exec->syncobj_values) {
uint64_t *new_syncobj_values =
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobj_values),
8, exec->alloc_scope);
if (!new_syncobj_values)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
typed_memcpy(new_syncobj_values, exec->syncobj_values,
exec->syncobj_count);
exec->syncobj_values = new_syncobj_values;
}
exec->syncobj_array_length = new_len;
}
if (timeline_value && !exec->syncobj_values) {
exec->syncobj_values =
vk_zalloc(exec->alloc, exec->syncobj_array_length *
sizeof(*exec->syncobj_values),
8, exec->alloc_scope);
if (!exec->syncobj_values)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
.handle = syncobj,
.flags = flags,
};
if (timeline_value)
exec->syncobj_values[exec->syncobj_count] = timeline_value;
exec->syncobj_count++;
return VK_SUCCESS;
}
static VkResult
anv_execbuf_add_sync(struct anv_device *device,
struct anv_execbuf *execbuf,
struct vk_sync *sync,
bool is_signal,
uint64_t value)
{
/* It's illegal to signal a timeline with value 0 because that's never
* higher than the current value. A timeline wait on value 0 is always
* trivial because 0 <= uint64_t always.
*/
if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
return VK_SUCCESS;
if (vk_sync_is_anv_bo_sync(sync)) {
struct anv_bo_sync *bo_sync =
container_of(sync, struct anv_bo_sync, sync);
assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
is_signal ? EXEC_OBJECT_WRITE : 0);
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
if (!(sync->flags & VK_SYNC_IS_TIMELINE))
value = 0;
return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
is_signal ? I915_EXEC_FENCE_SIGNAL :
I915_EXEC_FENCE_WAIT,
value);
}
unreachable("Invalid sync type");
}
static VkResult
setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
struct anv_cmd_buffer *cmd_buffer)
{
VkResult result;
/* Add surface dependencies (BOs) to the execbuf */
anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
cmd_buffer->surface_relocs.dep_words,
cmd_buffer->surface_relocs.deps, 0);
/* First, we walk over all of the bos we've seen and add them and their
* relocations to the validate list.
*/
struct anv_batch_bo **bbo;
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
(*bbo)->bo, &(*bbo)->relocs, 0);
if (result != VK_SUCCESS)
return result;
}
struct anv_bo **bo_entry;
u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
*bo_entry, NULL, 0);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
void
anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
uint32_t num_cmd_buffers)
@@ -1444,245 +1156,6 @@ anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
anv_cmd_buffer_record_end_submit(cmd_buffers[num_cmd_buffers - 1]);
}
static VkResult
pin_state_pool(struct anv_device *device,
struct anv_execbuf *execbuf,
struct anv_state_pool *pool)
{
anv_block_pool_foreach_bo(bo, &pool->block_pool) {
VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
static VkResult
setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
struct anv_queue *queue,
struct anv_cmd_buffer **cmd_buffers,
uint32_t num_cmd_buffers)
{
struct anv_device *device = queue->device;
VkResult result;
/* Edit the tail of the command buffers to chain them all together if they
* can be.
*/
anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
anv_measure_submit(cmd_buffers[i]);
result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
if (result != VK_SUCCESS)
return result;
}
/* Add all the global BOs to the object list for softpin case. */
result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->general_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->binding_table_pool);
if (result != VK_SUCCESS)
return result;
/* Add the BOs for all user allocated memory objects because we can't
* track after binding updates of VK_EXT_descriptor_indexing.
*/
list_for_each_entry(struct anv_device_memory, mem,
&device->memory_objects, link) {
result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = 0; i < execbuf->bo_count; i++)
execbuf->objects[i].offset = execbuf->bos[i]->offset;
struct anv_batch_bo *first_batch_bo =
list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link);
/* The kernel requires that the last entry in the validation list be the
* batch buffer to execute. We can simply swap the element
* corresponding to the first batch_bo in the chain with the last
* element in the list.
*/
if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
uint32_t idx = first_batch_bo->bo->exec_obj_index;
uint32_t last_idx = execbuf->bo_count - 1;
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
assert(execbuf->bos[idx] == first_batch_bo->bo);
execbuf->objects[idx] = execbuf->objects[last_idx];
execbuf->bos[idx] = execbuf->bos[last_idx];
execbuf->bos[idx]->exec_obj_index = idx;
execbuf->objects[last_idx] = tmp_obj;
execbuf->bos[last_idx] = first_batch_bo->bo;
first_batch_bo->bo->exec_obj_index = last_idx;
}
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush) {
__builtin_ia32_mfence();
struct anv_batch_bo **bbo;
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) {
for (uint32_t l = 0; l < (*bbo)->length; l += CACHELINE_SIZE)
__builtin_ia32_clflush((*bbo)->bo->map + l);
}
}
}
#endif
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
.buffer_count = execbuf->bo_count,
.batch_start_offset = 0,
/* We'll fill in batch length later when chaining batches. */
.batch_len = 0,
.cliprects_ptr = 0,
.num_cliprects = 0,
.DR1 = 0,
.DR4 = 0,
.flags = I915_EXEC_NO_RELOC |
I915_EXEC_HANDLE_LUT |
queue->exec_flags,
.rsvd1 = device->context_id,
.rsvd2 = 0,
};
return VK_SUCCESS;
}
static VkResult
setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
{
struct anv_device *device = queue->device;
VkResult result = anv_execbuf_add_bo(device, execbuf,
device->trivial_batch_bo,
NULL, 0);
if (result != VK_SUCCESS)
return result;
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
.buffer_count = execbuf->bo_count,
.batch_start_offset = 0,
.batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
.rsvd1 = device->context_id,
.rsvd2 = 0,
};
return VK_SUCCESS;
}
static VkResult
setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
struct anv_utrace_flush_copy *flush)
{
struct anv_device *device = queue->device;
VkResult result = anv_execbuf_add_bo(device, execbuf,
flush->batch_bo,
&flush->relocs, 0);
if (result != VK_SUCCESS)
return result;
result = anv_execbuf_add_sync(device, execbuf, flush->sync,
true /* is_signal */, 0 /* value */);
if (result != VK_SUCCESS)
return result;
if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) {
uint32_t idx = flush->batch_bo->exec_obj_index;
uint32_t last_idx = execbuf->bo_count - 1;
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
assert(execbuf->bos[idx] == flush->batch_bo);
execbuf->objects[idx] = execbuf->objects[last_idx];
execbuf->bos[idx] = execbuf->bos[last_idx];
execbuf->bos[idx]->exec_obj_index = idx;
execbuf->objects[last_idx] = tmp_obj;
execbuf->bos[last_idx] = flush->batch_bo;
flush->batch_bo->exec_obj_index = last_idx;
}
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush)
intel_flush_range(flush->batch_bo->map, flush->batch_bo->size);
#endif
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
.buffer_count = execbuf->bo_count,
.batch_start_offset = 0,
.batch_len = flush->batch.next - flush->batch.start,
.flags = I915_EXEC_NO_RELOC |
I915_EXEC_HANDLE_LUT |
I915_EXEC_FENCE_ARRAY |
queue->exec_flags,
.rsvd1 = device->context_id,
.rsvd2 = 0,
.num_cliprects = execbuf->syncobj_count,
.cliprects_ptr = (uintptr_t)execbuf->syncobjs,
};
return VK_SUCCESS;
}
static VkResult
anv_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_flush_copy *flush)
{
assert(flush->batch_bo);
struct anv_device *device = queue->device;
struct anv_execbuf execbuf = {
.alloc = &device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
};
VkResult result = setup_utrace_execbuf(&execbuf, queue, flush);
if (result != VK_SUCCESS)
goto error;
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
error:
anv_execbuf_finish(&execbuf);
return result;
}
void
anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
uint32_t cmd_buffer_count,
@@ -1752,181 +1225,10 @@ anv_queue_exec_locked(struct anv_queue *queue,
struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass)
{
struct anv_device *device = queue->device;
struct anv_utrace_flush_copy *utrace_flush_data = NULL;
struct anv_execbuf execbuf = {
.alloc = &queue->device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
.perf_query_pass = perf_query_pass,
};
/* Flush the trace points first, they need to be moved */
VkResult result =
anv_device_utrace_flush_cmd_buffers(queue,
cmd_buffer_count,
cmd_buffers,
&utrace_flush_data);
if (result != VK_SUCCESS)
goto error;
if (utrace_flush_data && !utrace_flush_data->batch_bo) {
result = anv_execbuf_add_sync(device, &execbuf,
utrace_flush_data->sync,
true /* is_signal */,
0);
if (result != VK_SUCCESS)
goto error;
utrace_flush_data = NULL;
}
/* Always add the workaround BO as it includes a driver identifier for the
* error_state.
*/
result =
anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
if (result != VK_SUCCESS)
goto error;
for (uint32_t i = 0; i < wait_count; i++) {
result = anv_execbuf_add_sync(device, &execbuf,
waits[i].sync,
false /* is_signal */,
waits[i].wait_value);
if (result != VK_SUCCESS)
goto error;
}
for (uint32_t i = 0; i < signal_count; i++) {
result = anv_execbuf_add_sync(device, &execbuf,
signals[i].sync,
true /* is_signal */,
signals[i].signal_value);
if (result != VK_SUCCESS)
goto error;
}
if (queue->sync) {
result = anv_execbuf_add_sync(device, &execbuf,
queue->sync,
true /* is_signal */,
0 /* signal_value */);
if (result != VK_SUCCESS)
goto error;
}
if (cmd_buffer_count) {
result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
cmd_buffers,
cmd_buffer_count);
} else {
result = setup_empty_execbuf(&execbuf, queue);
}
if (result != VK_SUCCESS)
goto error;
const bool has_perf_query =
perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
if (INTEL_DEBUG(DEBUG_SUBMIT)) {
uint32_t total_size_kb = 0;
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
const struct anv_bo *bo = execbuf.bos[i];
total_size_kb += bo->size / 1024;
}
fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (%.1fMb aperture)\n",
execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len,
(float)total_size_kb / 1024.0f);
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
const struct anv_bo *bo = execbuf.bos[i];
uint64_t size = bo->size + bo->_ccs_size;
fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
"KB handle=%05u capture=%u name=%s\n",
bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle,
(bo->flags & EXEC_OBJECT_CAPTURE) != 0, bo->name);
}
}
anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
return anv_i915_queue_exec_locked(queue, wait_count, waits,
cmd_buffer_count, cmd_buffers,
signal_count, signals,
perf_query_pool, perf_query_pass);
if (execbuf.syncobj_values) {
execbuf.timeline_fences.fence_count = execbuf.syncobj_count;
execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs;
execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values;
anv_execbuf_add_ext(&execbuf,
DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
&execbuf.timeline_fences.base);
} else if (execbuf.syncobjs) {
execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
execbuf.execbuf.num_cliprects = execbuf.syncobj_count;
execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs;
}
if (has_perf_query) {
assert(perf_query_pass < perf_query_pool->n_passes);
struct intel_perf_query_info *query_info =
perf_query_pool->pass_query[perf_query_pass];
/* Some performance queries just the pipeline statistic HW, no need for
* OA in that case, so no need to reconfigure.
*/
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
(void *)(uintptr_t) query_info->oa_metrics_set_id);
if (ret < 0) {
result = vk_device_set_lost(&device->vk,
"i915-perf config failed: %s",
strerror(errno));
}
}
struct anv_bo *pass_batch_bo = perf_query_pool->bo;
struct drm_i915_gem_exec_object2 query_pass_object = {
.handle = pass_batch_bo->gem_handle,
.offset = pass_batch_bo->offset,
.flags = pass_batch_bo->flags,
};
struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
.buffers_ptr = (uintptr_t) &query_pass_object,
.buffer_count = 1,
.batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
perf_query_pass),
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags,
.rsvd1 = device->context_id,
};
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &query_pass_execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
}
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
if (result == VK_SUCCESS && queue->sync) {
result = vk_sync_wait(&device->vk, queue->sync, 0,
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
if (result != VK_SUCCESS)
result = vk_queue_set_lost(&queue->vk, "sync wait failed");
}
error:
anv_execbuf_finish(&execbuf);
if (result == VK_SUCCESS && utrace_flush_data)
result = anv_queue_exec_utrace_locked(queue, utrace_flush_data);
return result;
}
static inline bool
@@ -2051,46 +1353,6 @@ anv_queue_submit(struct vk_queue *vk_queue,
return result;
}
static VkResult
anv_i915_execute_simple_batch(struct anv_queue *queue,
struct anv_bo *batch_bo,
uint32_t batch_bo_size)
{
struct anv_device *device = queue->device;
struct anv_execbuf execbuf = {
.alloc = &queue->device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
};
VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
if (result != VK_SUCCESS)
return result;
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf.objects,
.buffer_count = execbuf.bo_count,
.batch_start_offset = 0,
.batch_len = batch_bo_size,
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
.rsvd1 = device->context_id,
.rsvd2 = 0,
};
if (anv_gem_execbuffer(device, &execbuf.execbuf)) {
result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
goto fail;
}
result = anv_device_wait(device, batch_bo, INT64_MAX);
if (result != VK_SUCCESS)
result = vk_device_set_lost(&device->vk,
"anv_device_wait failed: %m");
fail:
anv_execbuf_finish(&execbuf);
return result;
}
VkResult
anv_queue_submit_simple_batch(struct anv_queue *queue,
struct anv_batch *batch)

View File

@@ -216,16 +216,6 @@ anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns
return ret;
}
int
anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf)
{
if (execbuf->flags & I915_EXEC_FENCE_OUT)
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
else
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
}
/** Return -1 on error. */
int
anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle)

View File

@@ -91,13 +91,6 @@ anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns
return 0;
}
int
anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf)
{
return 0;
}
int
anv_gem_set_tiling(struct anv_device *device,
uint32_t gem_handle, uint32_t stride, uint32_t tiling)

View File

@@ -1348,8 +1348,6 @@ uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
struct drm_i915_gem_memory_class_instance *regions);
uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
int anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf);
int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
uint32_t stride, uint32_t tiling);
int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);

View File

@@ -0,0 +1,796 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "i915/anv_batch_chain.h"
#include "anv_private.h"
#include "anv_measure.h"
#include "perf/intel_perf.h"
#include "util/u_debug.h"
#include "drm-uapi/i915_drm.h"
struct anv_execbuf {
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
struct drm_i915_gem_exec_object2 * objects;
uint32_t bo_count;
struct anv_bo ** bos;
/* Allocated length of the 'objects' and 'bos' arrays */
uint32_t array_length;
uint32_t syncobj_count;
uint32_t syncobj_array_length;
struct drm_i915_gem_exec_fence * syncobjs;
uint64_t * syncobj_values;
uint32_t cmd_buffer_count;
struct anv_query_pool *perf_query_pool;
const VkAllocationCallbacks * alloc;
VkSystemAllocationScope alloc_scope;
int perf_query_pass;
};
static void
anv_execbuf_finish(struct anv_execbuf *exec)
{
vk_free(exec->alloc, exec->syncobjs);
vk_free(exec->alloc, exec->syncobj_values);
vk_free(exec->alloc, exec->objects);
vk_free(exec->alloc, exec->bos);
}
static void
anv_execbuf_add_ext(struct anv_execbuf *exec,
uint32_t ext_name,
struct i915_user_extension *ext)
{
__u64 *iter = &exec->execbuf.cliprects_ptr;
exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
while (*iter != 0) {
iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
}
ext->name = ext_name;
*iter = (uintptr_t) ext;
}
static VkResult
anv_execbuf_add_bo_bitset(struct anv_device *device,
struct anv_execbuf *exec,
uint32_t dep_words,
BITSET_WORD *deps,
uint32_t extra_flags);
static VkResult
anv_execbuf_add_bo(struct anv_device *device,
struct anv_execbuf *exec,
struct anv_bo *bo,
struct anv_reloc_list *relocs,
uint32_t extra_flags)
{
struct drm_i915_gem_exec_object2 *obj = NULL;
if (bo->exec_obj_index < exec->bo_count &&
exec->bos[bo->exec_obj_index] == bo)
obj = &exec->objects[bo->exec_obj_index];
if (obj == NULL) {
/* We've never seen this one before. Add it to the list and assign
* an id that we can use later.
*/
if (exec->bo_count >= exec->array_length) {
uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
struct drm_i915_gem_exec_object2 *new_objects =
vk_alloc(exec->alloc, new_len * sizeof(*new_objects), 8, exec->alloc_scope);
if (new_objects == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
struct anv_bo **new_bos =
vk_alloc(exec->alloc, new_len * sizeof(*new_bos), 8, exec->alloc_scope);
if (new_bos == NULL) {
vk_free(exec->alloc, new_objects);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
if (exec->objects) {
memcpy(new_objects, exec->objects,
exec->bo_count * sizeof(*new_objects));
memcpy(new_bos, exec->bos,
exec->bo_count * sizeof(*new_bos));
}
vk_free(exec->alloc, exec->objects);
vk_free(exec->alloc, exec->bos);
exec->objects = new_objects;
exec->bos = new_bos;
exec->array_length = new_len;
}
assert(exec->bo_count < exec->array_length);
bo->exec_obj_index = exec->bo_count++;
obj = &exec->objects[bo->exec_obj_index];
exec->bos[bo->exec_obj_index] = bo;
obj->handle = bo->gem_handle;
obj->relocation_count = 0;
obj->relocs_ptr = 0;
obj->alignment = 0;
obj->offset = bo->offset;
obj->flags = bo->flags | extra_flags;
obj->rsvd1 = 0;
obj->rsvd2 = 0;
}
if (extra_flags & EXEC_OBJECT_WRITE) {
obj->flags |= EXEC_OBJECT_WRITE;
obj->flags &= ~EXEC_OBJECT_ASYNC;
}
if (relocs != NULL) {
return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
relocs->deps, extra_flags);
}
return VK_SUCCESS;
}
/* Add BO dependencies to execbuf */
static VkResult
anv_execbuf_add_bo_bitset(struct anv_device *device,
struct anv_execbuf *exec,
uint32_t dep_words,
BITSET_WORD *deps,
uint32_t extra_flags)
{
for (uint32_t w = 0; w < dep_words; w++) {
BITSET_WORD mask = deps[w];
while (mask) {
int i = u_bit_scan(&mask);
uint32_t gem_handle = w * BITSET_WORDBITS + i;
struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
assert(bo->refcount > 0);
VkResult result =
anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
if (result != VK_SUCCESS)
return result;
}
}
return VK_SUCCESS;
}
static VkResult
anv_execbuf_add_syncobj(struct anv_device *device,
struct anv_execbuf *exec,
uint32_t syncobj,
uint32_t flags,
uint64_t timeline_value)
{
if (exec->syncobj_count >= exec->syncobj_array_length) {
uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
struct drm_i915_gem_exec_fence *new_syncobjs =
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobjs),
8, exec->alloc_scope);
if (!new_syncobjs)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (exec->syncobjs)
typed_memcpy(new_syncobjs, exec->syncobjs, exec->syncobj_count);
exec->syncobjs = new_syncobjs;
if (exec->syncobj_values) {
uint64_t *new_syncobj_values =
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobj_values),
8, exec->alloc_scope);
if (!new_syncobj_values)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
typed_memcpy(new_syncobj_values, exec->syncobj_values,
exec->syncobj_count);
exec->syncobj_values = new_syncobj_values;
}
exec->syncobj_array_length = new_len;
}
if (timeline_value && !exec->syncobj_values) {
exec->syncobj_values =
vk_zalloc(exec->alloc, exec->syncobj_array_length *
sizeof(*exec->syncobj_values),
8, exec->alloc_scope);
if (!exec->syncobj_values)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
.handle = syncobj,
.flags = flags,
};
if (timeline_value)
exec->syncobj_values[exec->syncobj_count] = timeline_value;
exec->syncobj_count++;
return VK_SUCCESS;
}
static VkResult
anv_execbuf_add_sync(struct anv_device *device,
struct anv_execbuf *execbuf,
struct vk_sync *sync,
bool is_signal,
uint64_t value)
{
/* It's illegal to signal a timeline with value 0 because that's never
* higher than the current value. A timeline wait on value 0 is always
* trivial because 0 <= uint64_t always.
*/
if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
return VK_SUCCESS;
if (vk_sync_is_anv_bo_sync(sync)) {
struct anv_bo_sync *bo_sync =
container_of(sync, struct anv_bo_sync, sync);
assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
is_signal ? EXEC_OBJECT_WRITE : 0);
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
if (!(sync->flags & VK_SYNC_IS_TIMELINE))
value = 0;
return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
is_signal ? I915_EXEC_FENCE_SIGNAL :
I915_EXEC_FENCE_WAIT,
value);
}
unreachable("Invalid sync type");
}
static VkResult
setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
struct anv_cmd_buffer *cmd_buffer)
{
VkResult result;
/* Add surface dependencies (BOs) to the execbuf */
anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
cmd_buffer->surface_relocs.dep_words,
cmd_buffer->surface_relocs.deps, 0);
/* First, we walk over all of the bos we've seen and add them and their
* relocations to the validate list.
*/
struct anv_batch_bo **bbo;
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
(*bbo)->bo, &(*bbo)->relocs, 0);
if (result != VK_SUCCESS)
return result;
}
struct anv_bo **bo_entry;
u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
*bo_entry, NULL, 0);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
static VkResult
pin_state_pool(struct anv_device *device,
struct anv_execbuf *execbuf,
struct anv_state_pool *pool)
{
anv_block_pool_foreach_bo(bo, &pool->block_pool) {
VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
static VkResult
setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
struct anv_queue *queue,
struct anv_cmd_buffer **cmd_buffers,
uint32_t num_cmd_buffers)
{
struct anv_device *device = queue->device;
VkResult result;
/* Edit the tail of the command buffers to chain them all together if they
* can be.
*/
anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
anv_measure_submit(cmd_buffers[i]);
result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
if (result != VK_SUCCESS)
return result;
}
/* Add all the global BOs to the object list for softpin case. */
result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->general_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
if (result != VK_SUCCESS)
return result;
result = pin_state_pool(device, execbuf, &device->binding_table_pool);
if (result != VK_SUCCESS)
return result;
/* Add the BOs for all user allocated memory objects because we can't
* track after binding updates of VK_EXT_descriptor_indexing.
*/
list_for_each_entry(struct anv_device_memory, mem,
&device->memory_objects, link) {
result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = 0; i < execbuf->bo_count; i++)
execbuf->objects[i].offset = execbuf->bos[i]->offset;
struct anv_batch_bo *first_batch_bo =
list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link);
/* The kernel requires that the last entry in the validation list be the
* batch buffer to execute. We can simply swap the element
* corresponding to the first batch_bo in the chain with the last
* element in the list.
*/
if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
uint32_t idx = first_batch_bo->bo->exec_obj_index;
uint32_t last_idx = execbuf->bo_count - 1;
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
assert(execbuf->bos[idx] == first_batch_bo->bo);
execbuf->objects[idx] = execbuf->objects[last_idx];
execbuf->bos[idx] = execbuf->bos[last_idx];
execbuf->bos[idx]->exec_obj_index = idx;
execbuf->objects[last_idx] = tmp_obj;
execbuf->bos[last_idx] = first_batch_bo->bo;
first_batch_bo->bo->exec_obj_index = last_idx;
}
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush) {
__builtin_ia32_mfence();
struct anv_batch_bo **bbo;
for (uint32_t i = 0; i < num_cmd_buffers; i++) {
u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) {
for (uint32_t l = 0; l < (*bbo)->length; l += CACHELINE_SIZE)
__builtin_ia32_clflush((*bbo)->bo->map + l);
}
}
}
#endif
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
.buffer_count = execbuf->bo_count,
.batch_start_offset = 0,
/* We'll fill in batch length later when chaining batches. */
.batch_len = 0,
.cliprects_ptr = 0,
.num_cliprects = 0,
.DR1 = 0,
.DR4 = 0,
.flags = I915_EXEC_NO_RELOC |
I915_EXEC_HANDLE_LUT |
queue->exec_flags,
.rsvd1 = device->context_id,
.rsvd2 = 0,
};
return VK_SUCCESS;
}
static VkResult
setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
{
struct anv_device *device = queue->device;
VkResult result = anv_execbuf_add_bo(device, execbuf,
device->trivial_batch_bo,
NULL, 0);
if (result != VK_SUCCESS)
return result;
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
.buffer_count = execbuf->bo_count,
.batch_start_offset = 0,
.batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
.rsvd1 = device->context_id,
.rsvd2 = 0,
};
return VK_SUCCESS;
}
static VkResult
setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
struct anv_utrace_flush_copy *flush)
{
struct anv_device *device = queue->device;
VkResult result = anv_execbuf_add_bo(device, execbuf,
flush->batch_bo,
&flush->relocs, 0);
if (result != VK_SUCCESS)
return result;
result = anv_execbuf_add_sync(device, execbuf, flush->sync,
true /* is_signal */, 0 /* value */);
if (result != VK_SUCCESS)
return result;
if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) {
uint32_t idx = flush->batch_bo->exec_obj_index;
uint32_t last_idx = execbuf->bo_count - 1;
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
assert(execbuf->bos[idx] == flush->batch_bo);
execbuf->objects[idx] = execbuf->objects[last_idx];
execbuf->bos[idx] = execbuf->bos[last_idx];
execbuf->bos[idx]->exec_obj_index = idx;
execbuf->objects[last_idx] = tmp_obj;
execbuf->bos[last_idx] = flush->batch_bo;
flush->batch_bo->exec_obj_index = last_idx;
}
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush)
intel_flush_range(flush->batch_bo->map, flush->batch_bo->size);
#endif
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
.buffer_count = execbuf->bo_count,
.batch_start_offset = 0,
.batch_len = flush->batch.next - flush->batch.start,
.flags = I915_EXEC_NO_RELOC |
I915_EXEC_HANDLE_LUT |
I915_EXEC_FENCE_ARRAY |
queue->exec_flags,
.rsvd1 = device->context_id,
.rsvd2 = 0,
.num_cliprects = execbuf->syncobj_count,
.cliprects_ptr = (uintptr_t)execbuf->syncobjs,
};
return VK_SUCCESS;
}
static int
anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf)
{
if (execbuf->flags & I915_EXEC_FENCE_OUT)
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf);
else
return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
}
static VkResult
anv_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_flush_copy *flush)
{
assert(flush->batch_bo);
struct anv_device *device = queue->device;
struct anv_execbuf execbuf = {
.alloc = &device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
};
VkResult result = setup_utrace_execbuf(&execbuf, queue, flush);
if (result != VK_SUCCESS)
goto error;
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
error:
anv_execbuf_finish(&execbuf);
return result;
}
VkResult
anv_i915_queue_exec_locked(struct anv_queue *queue,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
uint32_t signal_count,
const struct vk_sync_signal *signals,
struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass)
{
struct anv_device *device = queue->device;
struct anv_utrace_flush_copy *utrace_flush_data = NULL;
struct anv_execbuf execbuf = {
.alloc = &queue->device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
.perf_query_pass = perf_query_pass,
};
/* Flush the trace points first, they need to be moved */
VkResult result =
anv_device_utrace_flush_cmd_buffers(queue,
cmd_buffer_count,
cmd_buffers,
&utrace_flush_data);
if (result != VK_SUCCESS)
goto error;
if (utrace_flush_data && !utrace_flush_data->batch_bo) {
result = anv_execbuf_add_sync(device, &execbuf,
utrace_flush_data->sync,
true /* is_signal */,
0);
if (result != VK_SUCCESS)
goto error;
utrace_flush_data = NULL;
}
/* Always add the workaround BO as it includes a driver identifier for the
* error_state.
*/
result =
anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
if (result != VK_SUCCESS)
goto error;
for (uint32_t i = 0; i < wait_count; i++) {
result = anv_execbuf_add_sync(device, &execbuf,
waits[i].sync,
false /* is_signal */,
waits[i].wait_value);
if (result != VK_SUCCESS)
goto error;
}
for (uint32_t i = 0; i < signal_count; i++) {
result = anv_execbuf_add_sync(device, &execbuf,
signals[i].sync,
true /* is_signal */,
signals[i].signal_value);
if (result != VK_SUCCESS)
goto error;
}
if (queue->sync) {
result = anv_execbuf_add_sync(device, &execbuf,
queue->sync,
true /* is_signal */,
0 /* signal_value */);
if (result != VK_SUCCESS)
goto error;
}
if (cmd_buffer_count) {
result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
cmd_buffers,
cmd_buffer_count);
} else {
result = setup_empty_execbuf(&execbuf, queue);
}
if (result != VK_SUCCESS)
goto error;
const bool has_perf_query =
perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
if (INTEL_DEBUG(DEBUG_SUBMIT)) {
uint32_t total_size_kb = 0;
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
const struct anv_bo *bo = execbuf.bos[i];
total_size_kb += bo->size / 1024;
}
fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (%.1fMb aperture)\n",
execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len,
(float)total_size_kb / 1024.0f);
for (uint32_t i = 0; i < execbuf.bo_count; i++) {
const struct anv_bo *bo = execbuf.bos[i];
uint64_t size = bo->size + bo->_ccs_size;
fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
"KB handle=%05u capture=%u name=%s\n",
bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle,
(bo->flags & EXEC_OBJECT_CAPTURE) != 0, bo->name);
}
}
anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
perf_query_pool, perf_query_pass);
if (execbuf.syncobj_values) {
execbuf.timeline_fences.fence_count = execbuf.syncobj_count;
execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs;
execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values;
anv_execbuf_add_ext(&execbuf,
DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
&execbuf.timeline_fences.base);
} else if (execbuf.syncobjs) {
execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
execbuf.execbuf.num_cliprects = execbuf.syncobj_count;
execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs;
}
if (has_perf_query) {
assert(perf_query_pass < perf_query_pool->n_passes);
struct intel_perf_query_info *query_info =
perf_query_pool->pass_query[perf_query_pass];
/* Some performance queries just the pipeline statistic HW, no need for
* OA in that case, so no need to reconfigure.
*/
if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
(void *)(uintptr_t) query_info->oa_metrics_set_id);
if (ret < 0) {
result = vk_device_set_lost(&device->vk,
"i915-perf config failed: %s",
strerror(errno));
}
}
struct anv_bo *pass_batch_bo = perf_query_pool->bo;
struct drm_i915_gem_exec_object2 query_pass_object = {
.handle = pass_batch_bo->gem_handle,
.offset = pass_batch_bo->offset,
.flags = pass_batch_bo->flags,
};
struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
.buffers_ptr = (uintptr_t) &query_pass_object,
.buffer_count = 1,
.batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
perf_query_pass),
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags,
.rsvd1 = device->context_id,
};
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &query_pass_execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
}
int ret = queue->device->info->no_hw ? 0 :
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
if (ret)
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
if (result == VK_SUCCESS && queue->sync) {
result = vk_sync_wait(&device->vk, queue->sync, 0,
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
if (result != VK_SUCCESS)
result = vk_queue_set_lost(&queue->vk, "sync wait failed");
}
error:
anv_execbuf_finish(&execbuf);
if (result == VK_SUCCESS && utrace_flush_data)
result = anv_queue_exec_utrace_locked(queue, utrace_flush_data);
return result;
}
VkResult
anv_i915_execute_simple_batch(struct anv_queue *queue,
struct anv_bo *batch_bo,
uint32_t batch_bo_size)
{
struct anv_device *device = queue->device;
struct anv_execbuf execbuf = {
.alloc = &queue->device->vk.alloc,
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
};
VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
if (result != VK_SUCCESS)
return result;
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf.objects,
.buffer_count = execbuf.bo_count,
.batch_start_offset = 0,
.batch_len = batch_bo_size,
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
.rsvd1 = device->context_id,
.rsvd2 = 0,
};
if (anv_gem_execbuffer(device, &execbuf.execbuf)) {
result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
goto fail;
}
result = anv_device_wait(device, batch_bo, INT64_MAX);
if (result != VK_SUCCESS)
result = vk_device_set_lost(&device->vk,
"anv_device_wait failed: %m");
fail:
anv_execbuf_finish(&execbuf);
return result;
}

View File

@@ -0,0 +1,49 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include <stdint.h>
#include "vulkan/vulkan_core.h"
#include "vk_sync.h"
struct anv_queue;
struct anv_bo;
struct anv_cmd_buffer;
struct anv_query_pool;
VkResult anv_i915_execute_simple_batch(struct anv_queue *queue,
struct anv_bo *batch_bo,
uint32_t batch_bo_size);
VkResult
anv_i915_queue_exec_locked(struct anv_queue *queue,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
uint32_t signal_count,
const struct vk_sync_signal *signals,
struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass);

View File

@@ -132,6 +132,8 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
endforeach
libanv_files = files(
'i915/anv_batch_chain.c',
'i915/anv_batch_chain.h',
'i915/anv_device.c',
'i915/anv_device.h',
'layers/anv_doom64.c',