diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index df638368d9c..50ab07e2c80 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -34,11 +34,11 @@ #include "genxml/gen8_pack.h" #include "genxml/genX_bits.h" -#include "perf/intel_perf.h" -#include "util/u_debug.h" #include "util/perf/u_trace.h" +#include "i915/anv_batch_chain.h" + /** \file anv_batch_chain.c * * This file contains functions related to anv_cmd_buffer as a data @@ -1139,294 +1139,6 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, &secondary->surface_relocs); } -struct anv_execbuf { - struct drm_i915_gem_execbuffer2 execbuf; - - struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; - - struct drm_i915_gem_exec_object2 * objects; - uint32_t bo_count; - struct anv_bo ** bos; - - /* Allocated length of the 'objects' and 'bos' arrays */ - uint32_t array_length; - - uint32_t syncobj_count; - uint32_t syncobj_array_length; - struct drm_i915_gem_exec_fence * syncobjs; - uint64_t * syncobj_values; - - uint32_t cmd_buffer_count; - struct anv_query_pool *perf_query_pool; - - const VkAllocationCallbacks * alloc; - VkSystemAllocationScope alloc_scope; - - int perf_query_pass; -}; - -static void -anv_execbuf_finish(struct anv_execbuf *exec) -{ - vk_free(exec->alloc, exec->syncobjs); - vk_free(exec->alloc, exec->syncobj_values); - vk_free(exec->alloc, exec->objects); - vk_free(exec->alloc, exec->bos); -} - -static void -anv_execbuf_add_ext(struct anv_execbuf *exec, - uint32_t ext_name, - struct i915_user_extension *ext) -{ - __u64 *iter = &exec->execbuf.cliprects_ptr; - - exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS; - - while (*iter != 0) { - iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension; - } - - ext->name = ext_name; - - *iter = (uintptr_t) ext; -} - -static VkResult -anv_execbuf_add_bo_bitset(struct anv_device *device, - struct anv_execbuf *exec, - uint32_t dep_words, - BITSET_WORD *deps, - uint32_t extra_flags); - -static VkResult -anv_execbuf_add_bo(struct anv_device *device, - struct anv_execbuf *exec, - struct anv_bo *bo, - struct anv_reloc_list *relocs, - uint32_t extra_flags) -{ - struct drm_i915_gem_exec_object2 *obj = NULL; - - if (bo->exec_obj_index < exec->bo_count && - exec->bos[bo->exec_obj_index] == bo) - obj = &exec->objects[bo->exec_obj_index]; - - if (obj == NULL) { - /* We've never seen this one before. Add it to the list and assign - * an id that we can use later. - */ - if (exec->bo_count >= exec->array_length) { - uint32_t new_len = exec->objects ? exec->array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - vk_alloc(exec->alloc, new_len * sizeof(*new_objects), 8, exec->alloc_scope); - if (new_objects == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - vk_alloc(exec->alloc, new_len * sizeof(*new_bos), 8, exec->alloc_scope); - if (new_bos == NULL) { - vk_free(exec->alloc, new_objects); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - if (exec->objects) { - memcpy(new_objects, exec->objects, - exec->bo_count * sizeof(*new_objects)); - memcpy(new_bos, exec->bos, - exec->bo_count * sizeof(*new_bos)); - } - - vk_free(exec->alloc, exec->objects); - vk_free(exec->alloc, exec->bos); - - exec->objects = new_objects; - exec->bos = new_bos; - exec->array_length = new_len; - } - - assert(exec->bo_count < exec->array_length); - - bo->exec_obj_index = exec->bo_count++; - obj = &exec->objects[bo->exec_obj_index]; - exec->bos[bo->exec_obj_index] = bo; - - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = bo->flags | extra_flags; - obj->rsvd1 = 0; - obj->rsvd2 = 0; - } - - if (extra_flags & EXEC_OBJECT_WRITE) { - obj->flags |= EXEC_OBJECT_WRITE; - obj->flags &= ~EXEC_OBJECT_ASYNC; - } - - if (relocs != NULL) { - return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words, - relocs->deps, extra_flags); - } - - return VK_SUCCESS; -} - -/* Add BO dependencies to execbuf */ -static VkResult -anv_execbuf_add_bo_bitset(struct anv_device *device, - struct anv_execbuf *exec, - uint32_t dep_words, - BITSET_WORD *deps, - uint32_t extra_flags) -{ - for (uint32_t w = 0; w < dep_words; w++) { - BITSET_WORD mask = deps[w]; - while (mask) { - int i = u_bit_scan(&mask); - uint32_t gem_handle = w * BITSET_WORDBITS + i; - struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle); - assert(bo->refcount > 0); - VkResult result = - anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags); - if (result != VK_SUCCESS) - return result; - } - } - - return VK_SUCCESS; -} - -static VkResult -anv_execbuf_add_syncobj(struct anv_device *device, - struct anv_execbuf *exec, - uint32_t syncobj, - uint32_t flags, - uint64_t timeline_value) -{ - if (exec->syncobj_count >= exec->syncobj_array_length) { - uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16); - - struct drm_i915_gem_exec_fence *new_syncobjs = - vk_alloc(exec->alloc, new_len * sizeof(*new_syncobjs), - 8, exec->alloc_scope); - if (!new_syncobjs) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - if (exec->syncobjs) - typed_memcpy(new_syncobjs, exec->syncobjs, exec->syncobj_count); - - exec->syncobjs = new_syncobjs; - - if (exec->syncobj_values) { - uint64_t *new_syncobj_values = - vk_alloc(exec->alloc, new_len * sizeof(*new_syncobj_values), - 8, exec->alloc_scope); - if (!new_syncobj_values) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - typed_memcpy(new_syncobj_values, exec->syncobj_values, - exec->syncobj_count); - - exec->syncobj_values = new_syncobj_values; - } - - exec->syncobj_array_length = new_len; - } - - if (timeline_value && !exec->syncobj_values) { - exec->syncobj_values = - vk_zalloc(exec->alloc, exec->syncobj_array_length * - sizeof(*exec->syncobj_values), - 8, exec->alloc_scope); - if (!exec->syncobj_values) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) { - .handle = syncobj, - .flags = flags, - }; - if (timeline_value) - exec->syncobj_values[exec->syncobj_count] = timeline_value; - - exec->syncobj_count++; - - return VK_SUCCESS; -} - -static VkResult -anv_execbuf_add_sync(struct anv_device *device, - struct anv_execbuf *execbuf, - struct vk_sync *sync, - bool is_signal, - uint64_t value) -{ - /* It's illegal to signal a timeline with value 0 because that's never - * higher than the current value. A timeline wait on value 0 is always - * trivial because 0 <= uint64_t always. - */ - if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0) - return VK_SUCCESS; - - if (vk_sync_is_anv_bo_sync(sync)) { - struct anv_bo_sync *bo_sync = - container_of(sync, struct anv_bo_sync, sync); - - assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET)); - - return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL, - is_signal ? EXEC_OBJECT_WRITE : 0); - } else if (vk_sync_type_is_drm_syncobj(sync->type)) { - struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync); - - if (!(sync->flags & VK_SYNC_IS_TIMELINE)) - value = 0; - - return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj, - is_signal ? I915_EXEC_FENCE_SIGNAL : - I915_EXEC_FENCE_WAIT, - value); - } - - unreachable("Invalid sync type"); -} - -static VkResult -setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, - struct anv_cmd_buffer *cmd_buffer) -{ - VkResult result; - /* Add surface dependencies (BOs) to the execbuf */ - anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf, - cmd_buffer->surface_relocs.dep_words, - cmd_buffer->surface_relocs.deps, 0); - - /* First, we walk over all of the bos we've seen and add them and their - * relocations to the validate list. - */ - struct anv_batch_bo **bbo; - u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { - result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, - (*bbo)->bo, &(*bbo)->relocs, 0); - if (result != VK_SUCCESS) - return result; - } - - struct anv_bo **bo_entry; - u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) { - result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, - *bo_entry, NULL, 0); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} - void anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers, uint32_t num_cmd_buffers) @@ -1444,245 +1156,6 @@ anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers, anv_cmd_buffer_record_end_submit(cmd_buffers[num_cmd_buffers - 1]); } -static VkResult -pin_state_pool(struct anv_device *device, - struct anv_execbuf *execbuf, - struct anv_state_pool *pool) -{ - anv_block_pool_foreach_bo(bo, &pool->block_pool) { - VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} - -static VkResult -setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, - struct anv_queue *queue, - struct anv_cmd_buffer **cmd_buffers, - uint32_t num_cmd_buffers) -{ - struct anv_device *device = queue->device; - VkResult result; - - /* Edit the tail of the command buffers to chain them all together if they - * can be. - */ - anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers); - - for (uint32_t i = 0; i < num_cmd_buffers; i++) { - anv_measure_submit(cmd_buffers[i]); - result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]); - if (result != VK_SUCCESS) - return result; - } - - /* Add all the global BOs to the object list for softpin case. */ - result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool); - if (result != VK_SUCCESS) - return result; - - result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool); - if (result != VK_SUCCESS) - return result; - - result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool); - if (result != VK_SUCCESS) - return result; - - result = pin_state_pool(device, execbuf, &device->dynamic_state_pool); - if (result != VK_SUCCESS) - return result; - - result = pin_state_pool(device, execbuf, &device->general_state_pool); - if (result != VK_SUCCESS) - return result; - - result = pin_state_pool(device, execbuf, &device->instruction_state_pool); - if (result != VK_SUCCESS) - return result; - - result = pin_state_pool(device, execbuf, &device->binding_table_pool); - if (result != VK_SUCCESS) - return result; - - /* Add the BOs for all user allocated memory objects because we can't - * track after binding updates of VK_EXT_descriptor_indexing. - */ - list_for_each_entry(struct anv_device_memory, mem, - &device->memory_objects, link) { - result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0); - if (result != VK_SUCCESS) - return result; - } - - for (uint32_t i = 0; i < execbuf->bo_count; i++) - execbuf->objects[i].offset = execbuf->bos[i]->offset; - - struct anv_batch_bo *first_batch_bo = - list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link); - - /* The kernel requires that the last entry in the validation list be the - * batch buffer to execute. We can simply swap the element - * corresponding to the first batch_bo in the chain with the last - * element in the list. - */ - if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) { - uint32_t idx = first_batch_bo->bo->exec_obj_index; - uint32_t last_idx = execbuf->bo_count - 1; - - struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; - assert(execbuf->bos[idx] == first_batch_bo->bo); - - execbuf->objects[idx] = execbuf->objects[last_idx]; - execbuf->bos[idx] = execbuf->bos[last_idx]; - execbuf->bos[idx]->exec_obj_index = idx; - - execbuf->objects[last_idx] = tmp_obj; - execbuf->bos[last_idx] = first_batch_bo->bo; - first_batch_bo->bo->exec_obj_index = last_idx; - } - -#ifdef SUPPORT_INTEL_INTEGRATED_GPUS - if (device->physical->memory.need_clflush) { - __builtin_ia32_mfence(); - struct anv_batch_bo **bbo; - for (uint32_t i = 0; i < num_cmd_buffers; i++) { - u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) { - for (uint32_t l = 0; l < (*bbo)->length; l += CACHELINE_SIZE) - __builtin_ia32_clflush((*bbo)->bo->map + l); - } - } - } -#endif - - execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) execbuf->objects, - .buffer_count = execbuf->bo_count, - .batch_start_offset = 0, - /* We'll fill in batch length later when chaining batches. */ - .batch_len = 0, - .cliprects_ptr = 0, - .num_cliprects = 0, - .DR1 = 0, - .DR4 = 0, - .flags = I915_EXEC_NO_RELOC | - I915_EXEC_HANDLE_LUT | - queue->exec_flags, - .rsvd1 = device->context_id, - .rsvd2 = 0, - }; - - return VK_SUCCESS; -} - -static VkResult -setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue) -{ - struct anv_device *device = queue->device; - VkResult result = anv_execbuf_add_bo(device, execbuf, - device->trivial_batch_bo, - NULL, 0); - if (result != VK_SUCCESS) - return result; - - execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) execbuf->objects, - .buffer_count = execbuf->bo_count, - .batch_start_offset = 0, - .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */ - .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, - .rsvd1 = device->context_id, - .rsvd2 = 0, - }; - - return VK_SUCCESS; -} - -static VkResult -setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, - struct anv_utrace_flush_copy *flush) -{ - struct anv_device *device = queue->device; - VkResult result = anv_execbuf_add_bo(device, execbuf, - flush->batch_bo, - &flush->relocs, 0); - if (result != VK_SUCCESS) - return result; - - result = anv_execbuf_add_sync(device, execbuf, flush->sync, - true /* is_signal */, 0 /* value */); - if (result != VK_SUCCESS) - return result; - - if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) { - uint32_t idx = flush->batch_bo->exec_obj_index; - uint32_t last_idx = execbuf->bo_count - 1; - - struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; - assert(execbuf->bos[idx] == flush->batch_bo); - - execbuf->objects[idx] = execbuf->objects[last_idx]; - execbuf->bos[idx] = execbuf->bos[last_idx]; - execbuf->bos[idx]->exec_obj_index = idx; - - execbuf->objects[last_idx] = tmp_obj; - execbuf->bos[last_idx] = flush->batch_bo; - flush->batch_bo->exec_obj_index = last_idx; - } - -#ifdef SUPPORT_INTEL_INTEGRATED_GPUS - if (device->physical->memory.need_clflush) - intel_flush_range(flush->batch_bo->map, flush->batch_bo->size); -#endif - - execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) execbuf->objects, - .buffer_count = execbuf->bo_count, - .batch_start_offset = 0, - .batch_len = flush->batch.next - flush->batch.start, - .flags = I915_EXEC_NO_RELOC | - I915_EXEC_HANDLE_LUT | - I915_EXEC_FENCE_ARRAY | - queue->exec_flags, - .rsvd1 = device->context_id, - .rsvd2 = 0, - .num_cliprects = execbuf->syncobj_count, - .cliprects_ptr = (uintptr_t)execbuf->syncobjs, - }; - - return VK_SUCCESS; -} - -static VkResult -anv_queue_exec_utrace_locked(struct anv_queue *queue, - struct anv_utrace_flush_copy *flush) -{ - assert(flush->batch_bo); - - struct anv_device *device = queue->device; - struct anv_execbuf execbuf = { - .alloc = &device->vk.alloc, - .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, - }; - - VkResult result = setup_utrace_execbuf(&execbuf, queue, flush); - if (result != VK_SUCCESS) - goto error; - - int ret = queue->device->info->no_hw ? 0 : - anv_gem_execbuffer(queue->device, &execbuf.execbuf); - if (ret) - result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); - - error: - anv_execbuf_finish(&execbuf); - - return result; -} - void anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue, uint32_t cmd_buffer_count, @@ -1752,181 +1225,10 @@ anv_queue_exec_locked(struct anv_queue *queue, struct anv_query_pool *perf_query_pool, uint32_t perf_query_pass) { - struct anv_device *device = queue->device; - struct anv_utrace_flush_copy *utrace_flush_data = NULL; - struct anv_execbuf execbuf = { - .alloc = &queue->device->vk.alloc, - .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, - .perf_query_pass = perf_query_pass, - }; - - /* Flush the trace points first, they need to be moved */ - VkResult result = - anv_device_utrace_flush_cmd_buffers(queue, - cmd_buffer_count, - cmd_buffers, - &utrace_flush_data); - if (result != VK_SUCCESS) - goto error; - - if (utrace_flush_data && !utrace_flush_data->batch_bo) { - result = anv_execbuf_add_sync(device, &execbuf, - utrace_flush_data->sync, - true /* is_signal */, - 0); - if (result != VK_SUCCESS) - goto error; - - utrace_flush_data = NULL; - } - - /* Always add the workaround BO as it includes a driver identifier for the - * error_state. - */ - result = - anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0); - if (result != VK_SUCCESS) - goto error; - - for (uint32_t i = 0; i < wait_count; i++) { - result = anv_execbuf_add_sync(device, &execbuf, - waits[i].sync, - false /* is_signal */, - waits[i].wait_value); - if (result != VK_SUCCESS) - goto error; - } - - for (uint32_t i = 0; i < signal_count; i++) { - result = anv_execbuf_add_sync(device, &execbuf, - signals[i].sync, - true /* is_signal */, - signals[i].signal_value); - if (result != VK_SUCCESS) - goto error; - } - - if (queue->sync) { - result = anv_execbuf_add_sync(device, &execbuf, - queue->sync, - true /* is_signal */, - 0 /* signal_value */); - if (result != VK_SUCCESS) - goto error; - } - - if (cmd_buffer_count) { - result = setup_execbuf_for_cmd_buffers(&execbuf, queue, - cmd_buffers, - cmd_buffer_count); - } else { - result = setup_empty_execbuf(&execbuf, queue); - } - - if (result != VK_SUCCESS) - goto error; - - const bool has_perf_query = - perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count; - - if (INTEL_DEBUG(DEBUG_SUBMIT)) { - uint32_t total_size_kb = 0; - for (uint32_t i = 0; i < execbuf.bo_count; i++) { - const struct anv_bo *bo = execbuf.bos[i]; - total_size_kb += bo->size / 1024; - } - - fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (%.1fMb aperture)\n", - execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len, - (float)total_size_kb / 1024.0f); - for (uint32_t i = 0; i < execbuf.bo_count; i++) { - const struct anv_bo *bo = execbuf.bos[i]; - uint64_t size = bo->size + bo->_ccs_size; - - fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64 - "KB handle=%05u capture=%u name=%s\n", - bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle, - (bo->flags & EXEC_OBJECT_CAPTURE) != 0, bo->name); - } - } - - anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers, - perf_query_pool, perf_query_pass); - - if (execbuf.syncobj_values) { - execbuf.timeline_fences.fence_count = execbuf.syncobj_count; - execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs; - execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values; - anv_execbuf_add_ext(&execbuf, - DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, - &execbuf.timeline_fences.base); - } else if (execbuf.syncobjs) { - execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; - execbuf.execbuf.num_cliprects = execbuf.syncobj_count; - execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs; - } - - if (has_perf_query) { - assert(perf_query_pass < perf_query_pool->n_passes); - struct intel_perf_query_info *query_info = - perf_query_pool->pass_query[perf_query_pass]; - - /* Some performance queries just the pipeline statistic HW, no need for - * OA in that case, so no need to reconfigure. - */ - if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) && - (query_info->kind == INTEL_PERF_QUERY_TYPE_OA || - query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) { - int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG, - (void *)(uintptr_t) query_info->oa_metrics_set_id); - if (ret < 0) { - result = vk_device_set_lost(&device->vk, - "i915-perf config failed: %s", - strerror(errno)); - } - } - - struct anv_bo *pass_batch_bo = perf_query_pool->bo; - - struct drm_i915_gem_exec_object2 query_pass_object = { - .handle = pass_batch_bo->gem_handle, - .offset = pass_batch_bo->offset, - .flags = pass_batch_bo->flags, - }; - struct drm_i915_gem_execbuffer2 query_pass_execbuf = { - .buffers_ptr = (uintptr_t) &query_pass_object, - .buffer_count = 1, - .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool, - perf_query_pass), - .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags, - .rsvd1 = device->context_id, - }; - - int ret = queue->device->info->no_hw ? 0 : - anv_gem_execbuffer(queue->device, &query_pass_execbuf); - if (ret) - result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); - } - - int ret = queue->device->info->no_hw ? 0 : - anv_gem_execbuffer(queue->device, &execbuf.execbuf); - if (ret) - result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); - - if (result == VK_SUCCESS && queue->sync) { - result = vk_sync_wait(&device->vk, queue->sync, 0, - VK_SYNC_WAIT_COMPLETE, UINT64_MAX); - if (result != VK_SUCCESS) - result = vk_queue_set_lost(&queue->vk, "sync wait failed"); - } - - error: - anv_execbuf_finish(&execbuf); - - if (result == VK_SUCCESS && utrace_flush_data) - result = anv_queue_exec_utrace_locked(queue, utrace_flush_data); - - return result; + return anv_i915_queue_exec_locked(queue, wait_count, waits, + cmd_buffer_count, cmd_buffers, + signal_count, signals, + perf_query_pool, perf_query_pass); } static inline bool @@ -2051,46 +1353,6 @@ anv_queue_submit(struct vk_queue *vk_queue, return result; } -static VkResult -anv_i915_execute_simple_batch(struct anv_queue *queue, - struct anv_bo *batch_bo, - uint32_t batch_bo_size) -{ - struct anv_device *device = queue->device; - struct anv_execbuf execbuf = { - .alloc = &queue->device->vk.alloc, - .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, - }; - - VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0); - if (result != VK_SUCCESS) - return result; - - execbuf.execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) execbuf.objects, - .buffer_count = execbuf.bo_count, - .batch_start_offset = 0, - .batch_len = batch_bo_size, - .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, - .rsvd1 = device->context_id, - .rsvd2 = 0, - }; - - if (anv_gem_execbuffer(device, &execbuf.execbuf)) { - result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m"); - goto fail; - } - - result = anv_device_wait(device, batch_bo, INT64_MAX); - if (result != VK_SUCCESS) - result = vk_device_set_lost(&device->vk, - "anv_device_wait failed: %m"); - -fail: - anv_execbuf_finish(&execbuf); - return result; -} - VkResult anv_queue_submit_simple_batch(struct anv_queue *queue, struct anv_batch *batch) diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c index 3321528e2fe..2f8a58cf205 100644 --- a/src/intel/vulkan/anv_gem.c +++ b/src/intel/vulkan/anv_gem.c @@ -216,16 +216,6 @@ anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns return ret; } -int -anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf) -{ - if (execbuf->flags & I915_EXEC_FENCE_OUT) - return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf); - else - return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); -} - /** Return -1 on error. */ int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle) diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c index b46cf0f58dc..50e0105eb73 100644 --- a/src/intel/vulkan/anv_gem_stubs.c +++ b/src/intel/vulkan/anv_gem_stubs.c @@ -91,13 +91,6 @@ anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns return 0; } -int -anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf) -{ - return 0; -} - int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, uint32_t stride, uint32_t tiling) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 079ec4896b9..d466904ca8d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1348,8 +1348,6 @@ uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size, struct drm_i915_gem_memory_class_instance *regions); uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); -int anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf); int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, uint32_t stride, uint32_t tiling); int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle); diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c new file mode 100644 index 00000000000..56754725165 --- /dev/null +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -0,0 +1,796 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "i915/anv_batch_chain.h" +#include "anv_private.h" +#include "anv_measure.h" + +#include "perf/intel_perf.h" +#include "util/u_debug.h" + +#include "drm-uapi/i915_drm.h" + +struct anv_execbuf { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; + + uint32_t syncobj_count; + uint32_t syncobj_array_length; + struct drm_i915_gem_exec_fence * syncobjs; + uint64_t * syncobj_values; + + uint32_t cmd_buffer_count; + struct anv_query_pool *perf_query_pool; + + const VkAllocationCallbacks * alloc; + VkSystemAllocationScope alloc_scope; + + int perf_query_pass; +}; + +static void +anv_execbuf_finish(struct anv_execbuf *exec) +{ + vk_free(exec->alloc, exec->syncobjs); + vk_free(exec->alloc, exec->syncobj_values); + vk_free(exec->alloc, exec->objects); + vk_free(exec->alloc, exec->bos); +} + +static void +anv_execbuf_add_ext(struct anv_execbuf *exec, + uint32_t ext_name, + struct i915_user_extension *ext) +{ + __u64 *iter = &exec->execbuf.cliprects_ptr; + + exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS; + + while (*iter != 0) { + iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension; + } + + ext->name = ext_name; + + *iter = (uintptr_t) ext; +} + +static VkResult +anv_execbuf_add_bo_bitset(struct anv_device *device, + struct anv_execbuf *exec, + uint32_t dep_words, + BITSET_WORD *deps, + uint32_t extra_flags); + +static VkResult +anv_execbuf_add_bo(struct anv_device *device, + struct anv_execbuf *exec, + struct anv_bo *bo, + struct anv_reloc_list *relocs, + uint32_t extra_flags) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->exec_obj_index < exec->bo_count && + exec->bos[bo->exec_obj_index] == bo) + obj = &exec->objects[bo->exec_obj_index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (exec->bo_count >= exec->array_length) { + uint32_t new_len = exec->objects ? exec->array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + vk_alloc(exec->alloc, new_len * sizeof(*new_objects), 8, exec->alloc_scope); + if (new_objects == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + vk_alloc(exec->alloc, new_len * sizeof(*new_bos), 8, exec->alloc_scope); + if (new_bos == NULL) { + vk_free(exec->alloc, new_objects); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (exec->objects) { + memcpy(new_objects, exec->objects, + exec->bo_count * sizeof(*new_objects)); + memcpy(new_bos, exec->bos, + exec->bo_count * sizeof(*new_bos)); + } + + vk_free(exec->alloc, exec->objects); + vk_free(exec->alloc, exec->bos); + + exec->objects = new_objects; + exec->bos = new_bos; + exec->array_length = new_len; + } + + assert(exec->bo_count < exec->array_length); + + bo->exec_obj_index = exec->bo_count++; + obj = &exec->objects[bo->exec_obj_index]; + exec->bos[bo->exec_obj_index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = bo->flags | extra_flags; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (extra_flags & EXEC_OBJECT_WRITE) { + obj->flags |= EXEC_OBJECT_WRITE; + obj->flags &= ~EXEC_OBJECT_ASYNC; + } + + if (relocs != NULL) { + return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words, + relocs->deps, extra_flags); + } + + return VK_SUCCESS; +} + +/* Add BO dependencies to execbuf */ +static VkResult +anv_execbuf_add_bo_bitset(struct anv_device *device, + struct anv_execbuf *exec, + uint32_t dep_words, + BITSET_WORD *deps, + uint32_t extra_flags) +{ + for (uint32_t w = 0; w < dep_words; w++) { + BITSET_WORD mask = deps[w]; + while (mask) { + int i = u_bit_scan(&mask); + uint32_t gem_handle = w * BITSET_WORDBITS + i; + struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle); + assert(bo->refcount > 0); + VkResult result = + anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags); + if (result != VK_SUCCESS) + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult +anv_execbuf_add_syncobj(struct anv_device *device, + struct anv_execbuf *exec, + uint32_t syncobj, + uint32_t flags, + uint64_t timeline_value) +{ + if (exec->syncobj_count >= exec->syncobj_array_length) { + uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16); + + struct drm_i915_gem_exec_fence *new_syncobjs = + vk_alloc(exec->alloc, new_len * sizeof(*new_syncobjs), + 8, exec->alloc_scope); + if (!new_syncobjs) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (exec->syncobjs) + typed_memcpy(new_syncobjs, exec->syncobjs, exec->syncobj_count); + + exec->syncobjs = new_syncobjs; + + if (exec->syncobj_values) { + uint64_t *new_syncobj_values = + vk_alloc(exec->alloc, new_len * sizeof(*new_syncobj_values), + 8, exec->alloc_scope); + if (!new_syncobj_values) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + typed_memcpy(new_syncobj_values, exec->syncobj_values, + exec->syncobj_count); + + exec->syncobj_values = new_syncobj_values; + } + + exec->syncobj_array_length = new_len; + } + + if (timeline_value && !exec->syncobj_values) { + exec->syncobj_values = + vk_zalloc(exec->alloc, exec->syncobj_array_length * + sizeof(*exec->syncobj_values), + 8, exec->alloc_scope); + if (!exec->syncobj_values) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) { + .handle = syncobj, + .flags = flags, + }; + if (timeline_value) + exec->syncobj_values[exec->syncobj_count] = timeline_value; + + exec->syncobj_count++; + + return VK_SUCCESS; +} + +static VkResult +anv_execbuf_add_sync(struct anv_device *device, + struct anv_execbuf *execbuf, + struct vk_sync *sync, + bool is_signal, + uint64_t value) +{ + /* It's illegal to signal a timeline with value 0 because that's never + * higher than the current value. A timeline wait on value 0 is always + * trivial because 0 <= uint64_t always. + */ + if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0) + return VK_SUCCESS; + + if (vk_sync_is_anv_bo_sync(sync)) { + struct anv_bo_sync *bo_sync = + container_of(sync, struct anv_bo_sync, sync); + + assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET)); + + return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL, + is_signal ? EXEC_OBJECT_WRITE : 0); + } else if (vk_sync_type_is_drm_syncobj(sync->type)) { + struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync); + + if (!(sync->flags & VK_SYNC_IS_TIMELINE)) + value = 0; + + return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj, + is_signal ? I915_EXEC_FENCE_SIGNAL : + I915_EXEC_FENCE_WAIT, + value); + } + + unreachable("Invalid sync type"); +} + +static VkResult +setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, + struct anv_cmd_buffer *cmd_buffer) +{ + VkResult result; + /* Add surface dependencies (BOs) to the execbuf */ + anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf, + cmd_buffer->surface_relocs.dep_words, + cmd_buffer->surface_relocs.deps, 0); + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, + (*bbo)->bo, &(*bbo)->relocs, 0); + if (result != VK_SUCCESS) + return result; + } + + struct anv_bo **bo_entry; + u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) { + result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, + *bo_entry, NULL, 0); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +static VkResult +pin_state_pool(struct anv_device *device, + struct anv_execbuf *execbuf, + struct anv_state_pool *pool) +{ + anv_block_pool_foreach_bo(bo, &pool->block_pool) { + VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +static VkResult +setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, + struct anv_queue *queue, + struct anv_cmd_buffer **cmd_buffers, + uint32_t num_cmd_buffers) +{ + struct anv_device *device = queue->device; + VkResult result; + + /* Edit the tail of the command buffers to chain them all together if they + * can be. + */ + anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers); + + for (uint32_t i = 0; i < num_cmd_buffers; i++) { + anv_measure_submit(cmd_buffers[i]); + result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]); + if (result != VK_SUCCESS) + return result; + } + + /* Add all the global BOs to the object list for softpin case. */ + result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->dynamic_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->general_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->instruction_state_pool); + if (result != VK_SUCCESS) + return result; + + result = pin_state_pool(device, execbuf, &device->binding_table_pool); + if (result != VK_SUCCESS) + return result; + + /* Add the BOs for all user allocated memory objects because we can't + * track after binding updates of VK_EXT_descriptor_indexing. + */ + list_for_each_entry(struct anv_device_memory, mem, + &device->memory_objects, link) { + result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0); + if (result != VK_SUCCESS) + return result; + } + + for (uint32_t i = 0; i < execbuf->bo_count; i++) + execbuf->objects[i].offset = execbuf->bos[i]->offset; + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) { + uint32_t idx = first_batch_bo->bo->exec_obj_index; + uint32_t last_idx = execbuf->bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; + assert(execbuf->bos[idx] == first_batch_bo->bo); + + execbuf->objects[idx] = execbuf->objects[last_idx]; + execbuf->bos[idx] = execbuf->bos[last_idx]; + execbuf->bos[idx]->exec_obj_index = idx; + + execbuf->objects[last_idx] = tmp_obj; + execbuf->bos[last_idx] = first_batch_bo->bo; + first_batch_bo->bo->exec_obj_index = last_idx; + } + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) { + __builtin_ia32_mfence(); + struct anv_batch_bo **bbo; + for (uint32_t i = 0; i < num_cmd_buffers; i++) { + u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) { + for (uint32_t l = 0; l < (*bbo)->length; l += CACHELINE_SIZE) + __builtin_ia32_clflush((*bbo)->bo->map + l); + } + } + } +#endif + + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf->objects, + .buffer_count = execbuf->bo_count, + .batch_start_offset = 0, + /* We'll fill in batch length later when chaining batches. */ + .batch_len = 0, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_NO_RELOC | + I915_EXEC_HANDLE_LUT | + queue->exec_flags, + .rsvd1 = device->context_id, + .rsvd2 = 0, + }; + + return VK_SUCCESS; +} + +static VkResult +setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue) +{ + struct anv_device *device = queue->device; + VkResult result = anv_execbuf_add_bo(device, execbuf, + device->trivial_batch_bo, + NULL, 0); + if (result != VK_SUCCESS) + return result; + + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf->objects, + .buffer_count = execbuf->bo_count, + .batch_start_offset = 0, + .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */ + .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, + .rsvd1 = device->context_id, + .rsvd2 = 0, + }; + + return VK_SUCCESS; +} + +static VkResult +setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, + struct anv_utrace_flush_copy *flush) +{ + struct anv_device *device = queue->device; + VkResult result = anv_execbuf_add_bo(device, execbuf, + flush->batch_bo, + &flush->relocs, 0); + if (result != VK_SUCCESS) + return result; + + result = anv_execbuf_add_sync(device, execbuf, flush->sync, + true /* is_signal */, 0 /* value */); + if (result != VK_SUCCESS) + return result; + + if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) { + uint32_t idx = flush->batch_bo->exec_obj_index; + uint32_t last_idx = execbuf->bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; + assert(execbuf->bos[idx] == flush->batch_bo); + + execbuf->objects[idx] = execbuf->objects[last_idx]; + execbuf->bos[idx] = execbuf->bos[last_idx]; + execbuf->bos[idx]->exec_obj_index = idx; + + execbuf->objects[last_idx] = tmp_obj; + execbuf->bos[last_idx] = flush->batch_bo; + flush->batch_bo->exec_obj_index = last_idx; + } + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) + intel_flush_range(flush->batch_bo->map, flush->batch_bo->size); +#endif + + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf->objects, + .buffer_count = execbuf->bo_count, + .batch_start_offset = 0, + .batch_len = flush->batch.next - flush->batch.start, + .flags = I915_EXEC_NO_RELOC | + I915_EXEC_HANDLE_LUT | + I915_EXEC_FENCE_ARRAY | + queue->exec_flags, + .rsvd1 = device->context_id, + .rsvd2 = 0, + .num_cliprects = execbuf->syncobj_count, + .cliprects_ptr = (uintptr_t)execbuf->syncobjs, + }; + + return VK_SUCCESS; +} + +static int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + if (execbuf->flags & I915_EXEC_FENCE_OUT) + return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf); + else + return intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +static VkResult +anv_queue_exec_utrace_locked(struct anv_queue *queue, + struct anv_utrace_flush_copy *flush) +{ + assert(flush->batch_bo); + + struct anv_device *device = queue->device; + struct anv_execbuf execbuf = { + .alloc = &device->vk.alloc, + .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + }; + + VkResult result = setup_utrace_execbuf(&execbuf, queue, flush); + if (result != VK_SUCCESS) + goto error; + + int ret = queue->device->info->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &execbuf.execbuf); + if (ret) + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + + error: + anv_execbuf_finish(&execbuf); + + return result; +} + +VkResult +anv_i915_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass) +{ + struct anv_device *device = queue->device; + struct anv_utrace_flush_copy *utrace_flush_data = NULL; + struct anv_execbuf execbuf = { + .alloc = &queue->device->vk.alloc, + .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + .perf_query_pass = perf_query_pass, + }; + + /* Flush the trace points first, they need to be moved */ + VkResult result = + anv_device_utrace_flush_cmd_buffers(queue, + cmd_buffer_count, + cmd_buffers, + &utrace_flush_data); + if (result != VK_SUCCESS) + goto error; + + if (utrace_flush_data && !utrace_flush_data->batch_bo) { + result = anv_execbuf_add_sync(device, &execbuf, + utrace_flush_data->sync, + true /* is_signal */, + 0); + if (result != VK_SUCCESS) + goto error; + + utrace_flush_data = NULL; + } + + /* Always add the workaround BO as it includes a driver identifier for the + * error_state. + */ + result = + anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0); + if (result != VK_SUCCESS) + goto error; + + for (uint32_t i = 0; i < wait_count; i++) { + result = anv_execbuf_add_sync(device, &execbuf, + waits[i].sync, + false /* is_signal */, + waits[i].wait_value); + if (result != VK_SUCCESS) + goto error; + } + + for (uint32_t i = 0; i < signal_count; i++) { + result = anv_execbuf_add_sync(device, &execbuf, + signals[i].sync, + true /* is_signal */, + signals[i].signal_value); + if (result != VK_SUCCESS) + goto error; + } + + if (queue->sync) { + result = anv_execbuf_add_sync(device, &execbuf, + queue->sync, + true /* is_signal */, + 0 /* signal_value */); + if (result != VK_SUCCESS) + goto error; + } + + if (cmd_buffer_count) { + result = setup_execbuf_for_cmd_buffers(&execbuf, queue, + cmd_buffers, + cmd_buffer_count); + } else { + result = setup_empty_execbuf(&execbuf, queue); + } + + if (result != VK_SUCCESS) + goto error; + + const bool has_perf_query = + perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count; + + if (INTEL_DEBUG(DEBUG_SUBMIT)) { + uint32_t total_size_kb = 0; + for (uint32_t i = 0; i < execbuf.bo_count; i++) { + const struct anv_bo *bo = execbuf.bos[i]; + total_size_kb += bo->size / 1024; + } + + fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (%.1fMb aperture)\n", + execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len, + (float)total_size_kb / 1024.0f); + for (uint32_t i = 0; i < execbuf.bo_count; i++) { + const struct anv_bo *bo = execbuf.bos[i]; + uint64_t size = bo->size + bo->_ccs_size; + + fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64 + "KB handle=%05u capture=%u name=%s\n", + bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle, + (bo->flags & EXEC_OBJECT_CAPTURE) != 0, bo->name); + } + } + + anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers, + perf_query_pool, perf_query_pass); + + if (execbuf.syncobj_values) { + execbuf.timeline_fences.fence_count = execbuf.syncobj_count; + execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs; + execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values; + anv_execbuf_add_ext(&execbuf, + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, + &execbuf.timeline_fences.base); + } else if (execbuf.syncobjs) { + execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; + execbuf.execbuf.num_cliprects = execbuf.syncobj_count; + execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs; + } + + if (has_perf_query) { + assert(perf_query_pass < perf_query_pool->n_passes); + struct intel_perf_query_info *query_info = + perf_query_pool->pass_query[perf_query_pass]; + + /* Some performance queries just the pipeline statistic HW, no need for + * OA in that case, so no need to reconfigure. + */ + if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) && + (query_info->kind == INTEL_PERF_QUERY_TYPE_OA || + query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) { + int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG, + (void *)(uintptr_t) query_info->oa_metrics_set_id); + if (ret < 0) { + result = vk_device_set_lost(&device->vk, + "i915-perf config failed: %s", + strerror(errno)); + } + } + + struct anv_bo *pass_batch_bo = perf_query_pool->bo; + + struct drm_i915_gem_exec_object2 query_pass_object = { + .handle = pass_batch_bo->gem_handle, + .offset = pass_batch_bo->offset, + .flags = pass_batch_bo->flags, + }; + struct drm_i915_gem_execbuffer2 query_pass_execbuf = { + .buffers_ptr = (uintptr_t) &query_pass_object, + .buffer_count = 1, + .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool, + perf_query_pass), + .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags, + .rsvd1 = device->context_id, + }; + + int ret = queue->device->info->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &query_pass_execbuf); + if (ret) + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + } + + int ret = queue->device->info->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &execbuf.execbuf); + if (ret) + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + + if (result == VK_SUCCESS && queue->sync) { + result = vk_sync_wait(&device->vk, queue->sync, 0, + VK_SYNC_WAIT_COMPLETE, UINT64_MAX); + if (result != VK_SUCCESS) + result = vk_queue_set_lost(&queue->vk, "sync wait failed"); + } + + error: + anv_execbuf_finish(&execbuf); + + if (result == VK_SUCCESS && utrace_flush_data) + result = anv_queue_exec_utrace_locked(queue, utrace_flush_data); + + return result; +} + +VkResult +anv_i915_execute_simple_batch(struct anv_queue *queue, + struct anv_bo *batch_bo, + uint32_t batch_bo_size) +{ + struct anv_device *device = queue->device; + struct anv_execbuf execbuf = { + .alloc = &queue->device->vk.alloc, + .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, + }; + + VkResult result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0); + if (result != VK_SUCCESS) + return result; + + execbuf.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf.objects, + .buffer_count = execbuf.bo_count, + .batch_start_offset = 0, + .batch_len = batch_bo_size, + .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, + .rsvd1 = device->context_id, + .rsvd2 = 0, + }; + + if (anv_gem_execbuffer(device, &execbuf.execbuf)) { + result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m"); + goto fail; + } + + result = anv_device_wait(device, batch_bo, INT64_MAX); + if (result != VK_SUCCESS) + result = vk_device_set_lost(&device->vk, + "anv_device_wait failed: %m"); + +fail: + anv_execbuf_finish(&execbuf); + return result; +} diff --git a/src/intel/vulkan/i915/anv_batch_chain.h b/src/intel/vulkan/i915/anv_batch_chain.h new file mode 100644 index 00000000000..42024ae5d62 --- /dev/null +++ b/src/intel/vulkan/i915/anv_batch_chain.h @@ -0,0 +1,49 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include + +#include "vulkan/vulkan_core.h" + +#include "vk_sync.h" + +struct anv_queue; +struct anv_bo; +struct anv_cmd_buffer; +struct anv_query_pool; + +VkResult anv_i915_execute_simple_batch(struct anv_queue *queue, + struct anv_bo *batch_bo, + uint32_t batch_bo_size); +VkResult +anv_i915_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass); diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 54b09fdd878..f9e0e90031b 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -132,6 +132,8 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']], endforeach libanv_files = files( + 'i915/anv_batch_chain.c', + 'i915/anv_batch_chain.h', 'i915/anv_device.c', 'i915/anv_device.h', 'layers/anv_doom64.c',