anv: move trtt submissions over to the anv_async_submit
We can remove a bunch of TRTT specific code from the backends as well as manual submission tracking. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28975>
This commit is contained in:

committed by
Marge Bot

parent
1adafbddbd
commit
7da5b1caef
@@ -1668,37 +1668,6 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit,
|
||||
struct anv_batch *batch)
|
||||
{
|
||||
struct anv_queue *queue = submit->queue;
|
||||
struct anv_device *device = queue->device;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
uint32_t batch_size = align(batch->next - batch->start, 8);
|
||||
struct anv_trtt_batch_bo *trtt_bbo;
|
||||
result = anv_trtt_batch_bo_new(device, batch_size, &trtt_bbo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
memcpy(trtt_bbo->bo->map, batch->start, trtt_bbo->size);
|
||||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||
if (device->physical->memory.need_flush &&
|
||||
anv_bo_needs_host_cache_flush(trtt_bbo->bo->alloc_flags))
|
||||
intel_flush_range(trtt_bbo->bo->map, trtt_bbo->size);
|
||||
#endif
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BATCH)) {
|
||||
intel_print_batch(queue->decoder, trtt_bbo->bo->map, trtt_bbo->bo->size,
|
||||
trtt_bbo->bo->offset, false);
|
||||
}
|
||||
|
||||
result = device->kmd_backend->execute_trtt_batch(submit, trtt_bbo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
|
||||
uint32_t num_cmd_buffers)
|
||||
|
@@ -3246,14 +3246,25 @@ anv_device_destroy_context_or_vm(struct anv_device *device)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
static VkResult
|
||||
anv_device_init_trtt(struct anv_device *device)
|
||||
{
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
|
||||
VkResult result =
|
||||
vk_sync_create(&device->vk,
|
||||
&device->physical->sync_syncobj_type,
|
||||
VK_SYNC_IS_TIMELINE,
|
||||
0 /* initial_value */,
|
||||
&trtt->timeline);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
simple_mtx_init(&trtt->mutex, mtx_plain);
|
||||
|
||||
list_inithead(&trtt->in_flight_batches);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -3261,31 +3272,9 @@ anv_device_finish_trtt(struct anv_device *device)
|
||||
{
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
|
||||
if (trtt->timeline_val > 0) {
|
||||
struct drm_syncobj_timeline_wait wait = {
|
||||
.handles = (uintptr_t)&trtt->timeline_handle,
|
||||
.points = (uintptr_t)&trtt->timeline_val,
|
||||
.timeout_nsec = INT64_MAX,
|
||||
.count_handles = 1,
|
||||
.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
|
||||
.first_signaled = false,
|
||||
};
|
||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &wait))
|
||||
fprintf(stderr, "TR-TT syncobj wait failed!\n");
|
||||
anv_sparse_trtt_garbage_collect_batches(device, true);
|
||||
|
||||
list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo,
|
||||
&trtt->in_flight_batches, link)
|
||||
anv_trtt_batch_bo_free(device, trtt_bbo);
|
||||
|
||||
}
|
||||
|
||||
if (trtt->timeline_handle > 0) {
|
||||
struct drm_syncobj_destroy destroy = {
|
||||
.handle = trtt->timeline_handle,
|
||||
};
|
||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy))
|
||||
fprintf(stderr, "TR-TT syncobj destroy failed!\n");
|
||||
}
|
||||
vk_sync_destroy(&device->vk, trtt->timeline);
|
||||
|
||||
simple_mtx_destroy(&trtt->mutex);
|
||||
|
||||
@@ -3915,6 +3904,10 @@ VkResult anv_CreateDevice(
|
||||
}
|
||||
}
|
||||
|
||||
result = anv_device_init_trtt(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_companion_cmd_pool;
|
||||
|
||||
anv_device_init_blorp(device);
|
||||
|
||||
anv_device_init_border_colors(device);
|
||||
@@ -3929,8 +3922,6 @@ VkResult anv_CreateDevice(
|
||||
|
||||
anv_device_init_embedded_samplers(device);
|
||||
|
||||
anv_device_init_trtt(device);
|
||||
|
||||
BITSET_ONES(device->gfx_dirty_state);
|
||||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
|
||||
BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
|
||||
@@ -3963,13 +3954,13 @@ VkResult anv_CreateDevice(
|
||||
|
||||
result = anv_genX(device->info, init_device_state)(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_companion_cmd_pool;
|
||||
goto fail_inits;
|
||||
|
||||
*pDevice = anv_device_to_handle(device);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_companion_cmd_pool:
|
||||
fail_inits:
|
||||
anv_device_finish_trtt(device);
|
||||
anv_device_finish_embedded_samplers(device);
|
||||
anv_device_utrace_finish(device);
|
||||
@@ -3977,7 +3968,7 @@ VkResult anv_CreateDevice(
|
||||
anv_device_finish_rt_shaders(device);
|
||||
anv_device_finish_astc_emu(device);
|
||||
anv_device_finish_internal_kernels(device);
|
||||
|
||||
fail_companion_cmd_pool:
|
||||
if (device->info->verx10 >= 125) {
|
||||
vk_common_DestroyCommandPool(anv_device_to_handle(device),
|
||||
device->companion_rcs_cmd_pool, NULL);
|
||||
@@ -4089,6 +4080,7 @@ void anv_DestroyDevice(
|
||||
|
||||
struct anv_physical_device *pdevice = device->physical;
|
||||
|
||||
/* Do TRTT batch garbage collection before destroying queues. */
|
||||
anv_device_finish_trtt(device);
|
||||
|
||||
for (uint32_t i = 0; i < device->queue_count; i++)
|
||||
|
@@ -65,13 +65,6 @@ stub_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
||||
return VK_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
stub_execute_trtt_batch(struct anv_sparse_submission *submit,
|
||||
struct anv_trtt_batch_bo *trtt_bbo)
|
||||
{
|
||||
return VK_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
stub_queue_exec_locked(struct anv_queue *queue,
|
||||
uint32_t wait_count,
|
||||
@@ -180,7 +173,6 @@ const struct anv_kmd_backend *anv_stub_kmd_backend_get(void)
|
||||
.vm_bind_bo = stub_vm_bind_bo,
|
||||
.vm_unbind_bo = stub_vm_bind_bo,
|
||||
.execute_simple_batch = stub_execute_simple_batch,
|
||||
.execute_trtt_batch = stub_execute_trtt_batch,
|
||||
.queue_exec_locked = stub_queue_exec_locked,
|
||||
.queue_exec_async = stub_queue_exec_async,
|
||||
.bo_alloc_flags_to_bo_flags = stub_bo_alloc_flags_to_bo_flags,
|
||||
|
@@ -38,8 +38,10 @@
|
||||
|
||||
struct intel_sample_positions;
|
||||
struct intel_urb_config;
|
||||
struct anv_async_submit;
|
||||
struct anv_embedded_sampler;
|
||||
struct anv_pipeline_embedded_sampler_binding;
|
||||
struct anv_trtt_bind;
|
||||
|
||||
typedef struct nir_builder nir_builder;
|
||||
typedef struct nir_shader nir_shader;
|
||||
@@ -351,9 +353,16 @@ genX(simple_shader_push_state_address)(struct anv_simple_shader *state,
|
||||
void
|
||||
genX(emit_simple_shader_end)(struct anv_simple_shader *state);
|
||||
|
||||
VkResult genX(init_trtt_context_state)(struct anv_queue *queue);
|
||||
VkResult genX(init_trtt_context_state)(struct anv_device *device,
|
||||
struct anv_async_submit *submit);
|
||||
|
||||
VkResult genX(write_trtt_entries)(struct anv_trtt_submission *submit);
|
||||
void genX(write_trtt_entries)(struct anv_async_submit *submit,
|
||||
struct anv_trtt_bind *l3l2_binds,
|
||||
uint32_t n_l3l2_binds,
|
||||
struct anv_trtt_bind *l1_binds,
|
||||
uint32_t n_l1_binds);
|
||||
|
||||
void genX(async_submit_end)(struct anv_async_submit *submit);
|
||||
|
||||
void
|
||||
genX(cmd_buffer_emit_push_descriptor_buffer_surface)(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
@@ -40,7 +40,6 @@ struct anv_query_pool;
|
||||
struct anv_async_submit;
|
||||
struct anv_utrace_submit;
|
||||
struct anv_sparse_submission;
|
||||
struct anv_trtt_batch_bo;
|
||||
|
||||
enum anv_vm_bind_op {
|
||||
/* bind vma specified in anv_vm_bind */
|
||||
@@ -113,8 +112,6 @@ struct anv_kmd_backend {
|
||||
bool is_companion_rcs_batch);
|
||||
/* The caller is expected to hold device->mutex when calling this vfunc.
|
||||
*/
|
||||
VkResult (*execute_trtt_batch)(struct anv_sparse_submission *submit,
|
||||
struct anv_trtt_batch_bo *trtt_bbo);
|
||||
VkResult (*queue_exec_locked)(struct anv_queue *queue,
|
||||
uint32_t wait_count,
|
||||
const struct vk_sync_wait *waits,
|
||||
|
@@ -768,35 +768,6 @@ struct anv_state_stream {
|
||||
struct util_dynarray all_blocks;
|
||||
};
|
||||
|
||||
struct anv_sparse_submission {
|
||||
struct anv_queue *queue;
|
||||
|
||||
struct anv_vm_bind *binds;
|
||||
int binds_len;
|
||||
int binds_capacity;
|
||||
|
||||
uint32_t wait_count;
|
||||
uint32_t signal_count;
|
||||
|
||||
struct vk_sync_wait *waits;
|
||||
struct vk_sync_signal *signals;
|
||||
};
|
||||
|
||||
struct anv_trtt_bind {
|
||||
uint64_t pte_addr;
|
||||
uint64_t entry_addr;
|
||||
};
|
||||
|
||||
struct anv_trtt_submission {
|
||||
struct anv_sparse_submission *sparse;
|
||||
|
||||
struct anv_trtt_bind *l3l2_binds;
|
||||
struct anv_trtt_bind *l1_binds;
|
||||
|
||||
int l3l2_binds_len;
|
||||
int l1_binds_len;
|
||||
};
|
||||
|
||||
/* The block_pool functions exported for testing only. The block pool should
|
||||
* only be used via a state pool (see below).
|
||||
*/
|
||||
@@ -1788,19 +1759,6 @@ struct anv_device_astc_emu {
|
||||
VkPipeline pipeline;
|
||||
};
|
||||
|
||||
struct anv_trtt_batch_bo {
|
||||
struct anv_bo *bo;
|
||||
uint32_t size;
|
||||
|
||||
/* Once device->trtt.timeline_handle signals timeline_val as complete we
|
||||
* can free this struct and its members.
|
||||
*/
|
||||
uint64_t timeline_val;
|
||||
|
||||
/* Part of device->trtt.in_flight_batches. */
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct anv_device {
|
||||
struct vk_device vk;
|
||||
|
||||
@@ -2028,12 +1986,11 @@ struct anv_device {
|
||||
struct anv_bo *cur_page_table_bo;
|
||||
uint64_t next_page_table_bo_offset;
|
||||
|
||||
/* Timeline syncobj used to track completion of the TR-TT batch BOs. */
|
||||
uint32_t timeline_handle;
|
||||
struct vk_sync *timeline;
|
||||
uint64_t timeline_val;
|
||||
|
||||
/* List of struct anv_trtt_batch_bo batches that are in flight and can
|
||||
* be freed once their timeline gets signaled.
|
||||
/* List of struct anv_trtt_submission that are in flight and can be
|
||||
* freed once their vk_sync gets signaled.
|
||||
*/
|
||||
struct list_head in_flight_batches;
|
||||
} trtt;
|
||||
@@ -2203,17 +2160,6 @@ VkResult anv_queue_submit(struct vk_queue *queue,
|
||||
VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||
struct anv_batch *batch,
|
||||
bool is_companion_rcs_batch);
|
||||
VkResult anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit,
|
||||
struct anv_batch *batch);
|
||||
|
||||
static inline void
|
||||
anv_trtt_batch_bo_free(struct anv_device *device,
|
||||
struct anv_trtt_batch_bo *trtt_bbo)
|
||||
{
|
||||
anv_bo_pool_free(&device->batch_bo_pool, trtt_bbo->bo);
|
||||
list_del(&trtt_bbo->link);
|
||||
vk_free(&device->vk.alloc, trtt_bbo);
|
||||
}
|
||||
|
||||
void anv_queue_trace(struct anv_queue *queue, const char *label,
|
||||
bool frame, bool begin);
|
||||
@@ -2521,6 +2467,32 @@ anv_async_submit_done(struct anv_async_submit *submit);
|
||||
bool
|
||||
anv_async_submit_wait(struct anv_async_submit *submit);
|
||||
|
||||
struct anv_sparse_submission {
|
||||
struct anv_queue *queue;
|
||||
|
||||
struct anv_vm_bind *binds;
|
||||
int binds_len;
|
||||
int binds_capacity;
|
||||
|
||||
uint32_t wait_count;
|
||||
uint32_t signal_count;
|
||||
|
||||
struct vk_sync_wait *waits;
|
||||
struct vk_sync_signal *signals;
|
||||
};
|
||||
|
||||
struct anv_trtt_bind {
|
||||
uint64_t pte_addr;
|
||||
uint64_t entry_addr;
|
||||
};
|
||||
|
||||
struct anv_trtt_submission {
|
||||
struct anv_async_submit base;
|
||||
|
||||
struct anv_sparse_submission *sparse;
|
||||
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct anv_device_memory {
|
||||
struct vk_device_memory vk;
|
||||
@@ -3217,6 +3189,9 @@ VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
|
||||
VkResult anv_sparse_bind(struct anv_device *device,
|
||||
struct anv_sparse_submission *sparse_submit);
|
||||
|
||||
VkResult anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
|
||||
bool wait_completion);
|
||||
|
||||
VkSparseImageFormatProperties
|
||||
anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
|
||||
VkImageAspectFlags aspect,
|
||||
@@ -3236,8 +3211,6 @@ VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
|
||||
VkSampleCountFlagBits samples,
|
||||
VkImageType type,
|
||||
VkFormat format);
|
||||
VkResult anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size,
|
||||
struct anv_trtt_batch_bo **out_trtt_bbo);
|
||||
|
||||
struct anv_buffer {
|
||||
struct vk_buffer vk;
|
||||
|
@@ -396,20 +396,11 @@ trtt_get_page_table_bo(struct anv_device *device, struct anv_bo **bo,
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_trtt_init_context_state(struct anv_queue *queue)
|
||||
anv_trtt_init_context_state(struct anv_device *device,
|
||||
struct anv_async_submit *submit)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
|
||||
struct drm_syncobj_create create = {
|
||||
.handle = 0,
|
||||
.flags = 0,
|
||||
};
|
||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create))
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
assert(create.handle != 0);
|
||||
trtt->timeline_handle = create.handle;
|
||||
|
||||
struct anv_bo *l3_bo;
|
||||
VkResult result = trtt_get_page_table_bo(device, &l3_bo, &trtt->l3_addr);
|
||||
if (result != VK_SUCCESS)
|
||||
@@ -430,7 +421,7 @@ anv_trtt_init_context_state(struct anv_queue *queue)
|
||||
goto fail_free_l3;
|
||||
}
|
||||
|
||||
result = anv_genX(device->info, init_trtt_context_state)(queue);
|
||||
result = anv_genX(device->info, init_trtt_context_state)(device, submit);
|
||||
|
||||
return result;
|
||||
|
||||
@@ -439,17 +430,6 @@ fail_free_l3:
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, int *binds_len,
|
||||
uint64_t pte_addr, uint64_t entry_addr)
|
||||
{
|
||||
binds[*binds_len] = (struct anv_trtt_bind) {
|
||||
.pte_addr = pte_addr,
|
||||
.entry_addr = entry_addr,
|
||||
};
|
||||
(*binds_len)++;
|
||||
}
|
||||
|
||||
/* For L3 and L2 pages, null and invalid entries are indicated by bits 1 and 0
|
||||
* respectively. For L1 entries, the hardware compares the addresses against
|
||||
* what we program to the GFX_TRTT_NULL and GFX_TRTT_INVAL registers.
|
||||
@@ -457,13 +437,27 @@ anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, int *binds_len,
|
||||
#define ANV_TRTT_L3L2_NULL_ENTRY (1 << 1)
|
||||
#define ANV_TRTT_L3L2_INVALID_ENTRY (1 << 0)
|
||||
|
||||
static void
|
||||
anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, uint32_t *binds_len,
|
||||
uint64_t pte_addr, uint64_t entry_addr)
|
||||
{
|
||||
binds[*binds_len] = (struct anv_trtt_bind) {
|
||||
.pte_addr = pte_addr,
|
||||
.entry_addr = entry_addr,
|
||||
};
|
||||
(*binds_len)++;
|
||||
}
|
||||
|
||||
/* Adds elements to the anv_trtt_bind structs passed. This doesn't write the
|
||||
* entries to the HW yet.
|
||||
*/
|
||||
static VkResult
|
||||
anv_trtt_bind_add(struct anv_device *device,
|
||||
uint64_t trtt_addr, uint64_t dest_addr,
|
||||
struct anv_trtt_submission *s)
|
||||
struct anv_trtt_bind *l3l2_binds,
|
||||
uint32_t *n_l3l2_binds,
|
||||
struct anv_trtt_bind *l1_binds,
|
||||
uint32_t *n_l1_binds)
|
||||
{
|
||||
VkResult result = VK_SUCCESS;
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
@@ -480,9 +474,10 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||
if (is_null_bind) {
|
||||
trtt->l3_mirror[l3_index] = ANV_TRTT_L3L2_NULL_ENTRY;
|
||||
|
||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
||||
trtt->l3_addr + l3_index * sizeof(uint64_t),
|
||||
ANV_TRTT_L3L2_NULL_ENTRY);
|
||||
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||
trtt->l3_addr + l3_index *
|
||||
sizeof(uint64_t),
|
||||
ANV_TRTT_L3L2_NULL_ENTRY);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -494,8 +489,9 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||
|
||||
trtt->l3_mirror[l3_index] = l2_addr;
|
||||
|
||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
||||
trtt->l3_addr + l3_index * sizeof(uint64_t), l2_addr);
|
||||
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||
trtt->l3_addr + l3_index *
|
||||
sizeof(uint64_t), l2_addr);
|
||||
}
|
||||
assert(l2_addr != 0 && l2_addr != ANV_TRTT_L3L2_NULL_ENTRY);
|
||||
|
||||
@@ -508,9 +504,9 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||
trtt->l2_mirror[l3_index * 512 + l2_index] =
|
||||
ANV_TRTT_L3L2_NULL_ENTRY;
|
||||
|
||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
||||
l2_addr + l2_index * sizeof(uint64_t),
|
||||
ANV_TRTT_L3L2_NULL_ENTRY);
|
||||
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||
l2_addr + l2_index * sizeof(uint64_t),
|
||||
ANV_TRTT_L3L2_NULL_ENTRY);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -522,13 +518,65 @@ anv_trtt_bind_add(struct anv_device *device,
|
||||
|
||||
trtt->l2_mirror[l3_index * 512 + l2_index] = l1_addr;
|
||||
|
||||
anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
|
||||
l2_addr + l2_index * sizeof(uint64_t), l1_addr);
|
||||
anv_trtt_bind_list_add_entry(l3l2_binds, n_l3l2_binds,
|
||||
l2_addr + l2_index * sizeof(uint64_t),
|
||||
l1_addr);
|
||||
}
|
||||
assert(l1_addr != 0 && l1_addr != ANV_TRTT_L3L2_NULL_ENTRY);
|
||||
|
||||
anv_trtt_bind_list_add_entry(s->l1_binds, &s->l1_binds_len,
|
||||
l1_addr + l1_index * sizeof(uint32_t), dest_addr);
|
||||
anv_trtt_bind_list_add_entry(l1_binds, n_l1_binds,
|
||||
l1_addr + l1_index * sizeof(uint32_t),
|
||||
dest_addr);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
|
||||
bool wait_completion)
|
||||
{
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
|
||||
uint64_t last_value;
|
||||
if (!wait_completion) {
|
||||
VkResult result =
|
||||
vk_sync_get_value(&device->vk, trtt->timeline, &last_value);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
} else {
|
||||
last_value = trtt->timeline_val;
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(struct anv_trtt_submission, submit,
|
||||
&trtt->in_flight_batches, link) {
|
||||
if (submit->base.signal.signal_value <= last_value) {
|
||||
list_del(&submit->link);
|
||||
anv_async_submit_fini(&submit->base);
|
||||
vk_free(&device->vk.alloc, submit);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!wait_completion)
|
||||
break;
|
||||
|
||||
VkResult result = vk_sync_wait(
|
||||
&device->vk,
|
||||
submit->base.signal.sync,
|
||||
submit->base.signal.signal_value,
|
||||
VK_SYNC_WAIT_COMPLETE,
|
||||
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
|
||||
if (result == VK_SUCCESS) {
|
||||
list_del(&submit->link);
|
||||
anv_async_submit_fini(&submit->base);
|
||||
vk_free(&device->vk.alloc, submit);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If the wait failed but the caller wanted completion, return the
|
||||
* error.
|
||||
*/
|
||||
return result;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -545,6 +593,35 @@ anv_sparse_bind_trtt(struct anv_device *device,
|
||||
if (!sparse_submit->queue)
|
||||
sparse_submit->queue = trtt->queue;
|
||||
|
||||
struct anv_trtt_submission *submit =
|
||||
vk_zalloc(&device->vk.alloc, sizeof(*submit), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (submit == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = anv_async_submit_init(&submit->base, sparse_submit->queue,
|
||||
&device->batch_bo_pool,
|
||||
false, false);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_async;
|
||||
|
||||
simple_mtx_lock(&trtt->mutex);
|
||||
|
||||
anv_sparse_trtt_garbage_collect_batches(device, false);
|
||||
|
||||
submit->base.signal = (struct vk_sync_signal) {
|
||||
.sync = trtt->timeline,
|
||||
.signal_value = ++trtt->timeline_val,
|
||||
};
|
||||
|
||||
/* If the TRTT L3 table was never set, initialize it as part of this
|
||||
* submission.
|
||||
*/
|
||||
if (!trtt->l3_addr)
|
||||
anv_trtt_init_context_state(device, &submit->base);
|
||||
|
||||
assert(trtt->l3_addr);
|
||||
|
||||
/* These capacities are conservative estimations. For L1 binds the
|
||||
* number will match exactly unless we skip NULL binds due to L2 already
|
||||
* being NULL. For L3/L2 things are harder to estimate, but the resulting
|
||||
@@ -561,26 +638,15 @@ anv_sparse_bind_trtt(struct anv_device *device,
|
||||
l3l2_binds_capacity += (pages / 1024 + 1) * 2;
|
||||
}
|
||||
|
||||
/* Turn a series of virtual address maps, into a list of L3/L2/L1 TRTT page
|
||||
* table updates.
|
||||
*/
|
||||
STACK_ARRAY(struct anv_trtt_bind, l3l2_binds, l3l2_binds_capacity);
|
||||
STACK_ARRAY(struct anv_trtt_bind, l1_binds, l1_binds_capacity);
|
||||
struct anv_trtt_submission trtt_submit = {
|
||||
.sparse = sparse_submit,
|
||||
.l3l2_binds = l3l2_binds,
|
||||
.l1_binds = l1_binds,
|
||||
.l3l2_binds_len = 0,
|
||||
.l1_binds_len = 0,
|
||||
};
|
||||
|
||||
simple_mtx_lock(&trtt->mutex);
|
||||
|
||||
if (!trtt->l3_addr)
|
||||
anv_trtt_init_context_state(sparse_submit->queue);
|
||||
|
||||
assert(trtt->l3_addr);
|
||||
|
||||
for (int b = 0; b < sparse_submit->binds_len; b++) {
|
||||
uint32_t n_l3l2_binds = 0, n_l1_binds = 0;
|
||||
for (int b = 0; b < sparse_submit->binds_len && result == VK_SUCCESS; b++) {
|
||||
struct anv_vm_bind *vm_bind = &sparse_submit->binds[b];
|
||||
for (size_t i = 0; i < vm_bind->size; i += 64 * 1024) {
|
||||
for (size_t i = 0; i < vm_bind->size && result == VK_SUCCESS; i += 64 * 1024) {
|
||||
uint64_t trtt_addr = vm_bind->address + i;
|
||||
uint64_t dest_addr =
|
||||
(vm_bind->op == ANV_VM_BIND && vm_bind->bo) ?
|
||||
@@ -588,29 +654,74 @@ anv_sparse_bind_trtt(struct anv_device *device,
|
||||
ANV_TRTT_L1_NULL_TILE_VAL;
|
||||
|
||||
result = anv_trtt_bind_add(device, trtt_addr, dest_addr,
|
||||
&trtt_submit);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
l3l2_binds, &n_l3l2_binds,
|
||||
l1_binds, &n_l1_binds);
|
||||
}
|
||||
}
|
||||
|
||||
assert(trtt_submit.l3l2_binds_len <= l3l2_binds_capacity);
|
||||
assert(trtt_submit.l1_binds_len <= l1_binds_capacity);
|
||||
assert(n_l3l2_binds <= l3l2_binds_capacity);
|
||||
assert(n_l1_binds <= l1_binds_capacity);
|
||||
|
||||
sparse_debug("trtt_binds: num_vm_binds:%02d l3l2:%04d l1:%04d\n",
|
||||
sparse_submit->binds_len, trtt_submit.l3l2_binds_len,
|
||||
trtt_submit.l1_binds_len);
|
||||
/* Convert the L3/L2/L1 TRTT page table updates in anv_trtt_bind elements
|
||||
* into MI commands.
|
||||
*/
|
||||
if (result == VK_SUCCESS) {
|
||||
sparse_debug("trtt_binds: num_vm_binds:%02d l3l2:%04d l1:%04d\n",
|
||||
sparse_submit->binds_len, n_l3l2_binds, n_l1_binds);
|
||||
|
||||
if (trtt_submit.l3l2_binds_len || trtt_submit.l1_binds_len)
|
||||
result = anv_genX(device->info, write_trtt_entries)(&trtt_submit);
|
||||
if (n_l3l2_binds || n_l1_binds) {
|
||||
anv_genX(device->info, write_trtt_entries)(
|
||||
&submit->base, l3l2_binds, n_l3l2_binds, l1_binds, n_l1_binds);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == VK_SUCCESS)
|
||||
ANV_RMV(vm_binds, device, sparse_submit->binds, sparse_submit->binds_len);
|
||||
|
||||
out:
|
||||
simple_mtx_unlock(&trtt->mutex);
|
||||
STACK_ARRAY_FINISH(l1_binds);
|
||||
STACK_ARRAY_FINISH(l3l2_binds);
|
||||
|
||||
anv_genX(device->info, async_submit_end)(&submit->base);
|
||||
|
||||
if (submit->base.batch.status != VK_SUCCESS) {
|
||||
result = submit->base.batch.status;
|
||||
goto error_add_bind;
|
||||
}
|
||||
|
||||
/* Add all the BOs backing TRTT page tables to the reloc list.
|
||||
*
|
||||
* TODO: we could narrow down the list by using anv_address structures in
|
||||
* anv_trtt_bind for the pte_addr.
|
||||
*/
|
||||
if (device->physical->uses_relocs) {
|
||||
for (int i = 0; i < trtt->num_page_table_bos; i++) {
|
||||
result = anv_reloc_list_add_bo(&submit->base.relocs,
|
||||
trtt->page_table_bos[i]);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_add_bind;
|
||||
}
|
||||
}
|
||||
|
||||
result =
|
||||
device->kmd_backend->queue_exec_async(&submit->base,
|
||||
sparse_submit->wait_count,
|
||||
sparse_submit->waits,
|
||||
sparse_submit->signal_count,
|
||||
sparse_submit->signals);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_add_bind;
|
||||
|
||||
|
||||
list_addtail(&submit->link, &trtt->in_flight_batches);
|
||||
|
||||
simple_mtx_unlock(&trtt->mutex);
|
||||
|
||||
ANV_RMV(vm_binds, device, sparse_submit->binds, sparse_submit->binds_len);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
error_add_bind:
|
||||
simple_mtx_unlock(&trtt->mutex);
|
||||
anv_async_submit_fini(&submit->base);
|
||||
error_async:
|
||||
vk_free(&device->vk.alloc, submit);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1299,65 +1410,3 @@ anv_sparse_image_check_support(struct anv_physical_device *pdevice,
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_trtt_garbage_collect_batches(struct anv_device *device)
|
||||
{
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
|
||||
if (trtt->timeline_val % 8 != 7)
|
||||
return VK_SUCCESS;
|
||||
|
||||
uint64_t cur_timeline_val = 0;
|
||||
struct drm_syncobj_timeline_array array = {
|
||||
.handles = (uintptr_t)&trtt->timeline_handle,
|
||||
.points = (uintptr_t)&cur_timeline_val,
|
||||
.count_handles = 1,
|
||||
.flags = 0,
|
||||
};
|
||||
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_QUERY, &array))
|
||||
return vk_error(device, VK_ERROR_UNKNOWN);
|
||||
|
||||
list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo,
|
||||
&trtt->in_flight_batches, link) {
|
||||
if (trtt_bbo->timeline_val > cur_timeline_val)
|
||||
return VK_SUCCESS;
|
||||
|
||||
anv_trtt_batch_bo_free(device, trtt_bbo);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size,
|
||||
struct anv_trtt_batch_bo **out_trtt_bbo)
|
||||
{
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
VkResult result;
|
||||
|
||||
anv_trtt_garbage_collect_batches(device);
|
||||
|
||||
struct anv_trtt_batch_bo *trtt_bbo =
|
||||
vk_alloc(&device->vk.alloc, sizeof(*trtt_bbo), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!trtt_bbo)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = anv_bo_pool_alloc(&device->batch_bo_pool, batch_size,
|
||||
&trtt_bbo->bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
|
||||
trtt_bbo->size = batch_size;
|
||||
trtt_bbo->timeline_val = ++trtt->timeline_val;
|
||||
|
||||
list_addtail(&trtt_bbo->link, &trtt->in_flight_batches);
|
||||
|
||||
*out_trtt_bbo = trtt_bbo;
|
||||
|
||||
return VK_SUCCESS;
|
||||
out:
|
||||
vk_free(&device->vk.alloc, trtt_bbo);
|
||||
return result;
|
||||
}
|
||||
|
@@ -6094,22 +6094,17 @@ genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
|
||||
#endif
|
||||
}
|
||||
|
||||
VkResult
|
||||
genX(write_trtt_entries)(struct anv_trtt_submission *submit)
|
||||
void
|
||||
genX(write_trtt_entries)(struct anv_async_submit *submit,
|
||||
struct anv_trtt_bind *l3l2_binds,
|
||||
uint32_t n_l3l2_binds,
|
||||
struct anv_trtt_bind *l1_binds,
|
||||
uint32_t n_l1_binds)
|
||||
{
|
||||
#if GFX_VER >= 12
|
||||
const struct intel_device_info *devinfo =
|
||||
submit->sparse->queue->device->info;
|
||||
|
||||
size_t batch_size = submit->l3l2_binds_len * 20 +
|
||||
submit->l1_binds_len * 16 +
|
||||
GENX(PIPE_CONTROL_length) * sizeof(uint32_t) + 8;
|
||||
STACK_ARRAY(uint32_t, cmds, batch_size);
|
||||
struct anv_batch batch = {
|
||||
.start = cmds,
|
||||
.next = cmds,
|
||||
.end = (void *)cmds + batch_size,
|
||||
};
|
||||
submit->queue->device->info;
|
||||
struct anv_batch *batch = &submit->batch;
|
||||
|
||||
/* BSpec says:
|
||||
* "DWord Length programmed must not exceed 0x3FE."
|
||||
@@ -6127,90 +6122,86 @@ genX(write_trtt_entries)(struct anv_trtt_submission *submit)
|
||||
* contiguous addresses.
|
||||
*/
|
||||
|
||||
for (int i = 0; i < submit->l3l2_binds_len; i++) {
|
||||
for (uint32_t i = 0; i < n_l3l2_binds; i++) {
|
||||
int extra_writes = 0;
|
||||
for (int j = i + 1;
|
||||
j < submit->l3l2_binds_len &&
|
||||
extra_writes <= max_qword_extra_writes;
|
||||
for (uint32_t j = i + 1;
|
||||
j < n_l3l2_binds && extra_writes <= max_qword_extra_writes;
|
||||
j++) {
|
||||
if (submit->l3l2_binds[i].pte_addr + (j - i) * 8 ==
|
||||
submit->l3l2_binds[j].pte_addr) {
|
||||
if (l3l2_binds[i].pte_addr + (j - i) * 8 == l3l2_binds[j].pte_addr) {
|
||||
extra_writes++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
bool is_last_write = submit->l1_binds_len == 0 &&
|
||||
i + extra_writes + 1 == submit->l3l2_binds_len;
|
||||
bool is_last_write = n_l1_binds == 0 &&
|
||||
i + extra_writes + 1 == n_l3l2_binds;
|
||||
|
||||
uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
|
||||
qword_write_len + (extra_writes * 2);
|
||||
uint32_t *dw;
|
||||
dw = anv_batch_emitn(&batch, total_len, GENX(MI_STORE_DATA_IMM),
|
||||
dw = anv_batch_emitn(batch, total_len, GENX(MI_STORE_DATA_IMM),
|
||||
.ForceWriteCompletionCheck = is_last_write,
|
||||
.StoreQword = true,
|
||||
.Address = anv_address_from_u64(submit->l3l2_binds[i].pte_addr),
|
||||
.Address = anv_address_from_u64(l3l2_binds[i].pte_addr),
|
||||
);
|
||||
dw += 3;
|
||||
for (int j = 0; j < extra_writes + 1; j++) {
|
||||
uint64_t entry_addr_64b = submit->l3l2_binds[i + j].entry_addr;
|
||||
for (uint32_t j = 0; j < extra_writes + 1; j++) {
|
||||
uint64_t entry_addr_64b = l3l2_binds[i + j].entry_addr;
|
||||
*dw = entry_addr_64b & 0xFFFFFFFF;
|
||||
dw++;
|
||||
*dw = (entry_addr_64b >> 32) & 0xFFFFFFFF;
|
||||
dw++;
|
||||
}
|
||||
assert(dw == batch.next);
|
||||
assert(dw == batch->next);
|
||||
|
||||
i += extra_writes;
|
||||
}
|
||||
|
||||
for (int i = 0; i < submit->l1_binds_len; i++) {
|
||||
for (uint32_t i = 0; i < n_l1_binds; i++) {
|
||||
int extra_writes = 0;
|
||||
for (int j = i + 1;
|
||||
j < submit->l1_binds_len && extra_writes <= max_dword_extra_writes;
|
||||
for (uint32_t j = i + 1;
|
||||
j < n_l1_binds && extra_writes <= max_dword_extra_writes;
|
||||
j++) {
|
||||
if (submit->l1_binds[i].pte_addr + (j - i) * 4 ==
|
||||
submit->l1_binds[j].pte_addr) {
|
||||
if (l1_binds[i].pte_addr + (j - i) * 4 ==
|
||||
l1_binds[j].pte_addr) {
|
||||
extra_writes++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_last_write = i + extra_writes + 1 == submit->l1_binds_len;
|
||||
bool is_last_write = i + extra_writes + 1 == n_l1_binds;
|
||||
|
||||
uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
|
||||
dword_write_len + extra_writes;
|
||||
uint32_t *dw;
|
||||
dw = anv_batch_emitn(&batch, total_len, GENX(MI_STORE_DATA_IMM),
|
||||
dw = anv_batch_emitn(batch, total_len, GENX(MI_STORE_DATA_IMM),
|
||||
.ForceWriteCompletionCheck = is_last_write,
|
||||
.Address = anv_address_from_u64(submit->l1_binds[i].pte_addr),
|
||||
.Address = anv_address_from_u64(l1_binds[i].pte_addr),
|
||||
);
|
||||
dw += 3;
|
||||
for (int j = 0; j < extra_writes + 1; j++) {
|
||||
*dw = (submit->l1_binds[i + j].entry_addr >> 16) & 0xFFFFFFFF;
|
||||
for (uint32_t j = 0; j < extra_writes + 1; j++) {
|
||||
*dw = (l1_binds[i + j].entry_addr >> 16) & 0xFFFFFFFF;
|
||||
dw++;
|
||||
}
|
||||
assert(dw == batch.next);
|
||||
assert(dw == batch->next);
|
||||
|
||||
i += extra_writes;
|
||||
}
|
||||
|
||||
genx_batch_emit_pipe_control(&batch, devinfo, _3D,
|
||||
genx_batch_emit_pipe_control(batch, devinfo, _3D,
|
||||
ANV_PIPE_CS_STALL_BIT |
|
||||
ANV_PIPE_TLB_INVALIDATE_BIT);
|
||||
|
||||
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
|
||||
|
||||
assert(batch.next <= batch.end);
|
||||
|
||||
VkResult result = anv_queue_submit_trtt_batch(submit->sparse, &batch);
|
||||
STACK_ARRAY_FINISH(cmds);
|
||||
|
||||
return result;
|
||||
|
||||
#else
|
||||
unreachable("Not implemented");
|
||||
#endif
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
genX(async_submit_end)(struct anv_async_submit *submit)
|
||||
{
|
||||
struct anv_batch *batch = &submit->batch;
|
||||
anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_END), bbe);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -1396,31 +1396,25 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
|
||||
VkResult
|
||||
genX(init_trtt_context_state)(struct anv_queue *queue)
|
||||
genX(init_trtt_context_state)(struct anv_device *device,
|
||||
struct anv_async_submit *submit)
|
||||
{
|
||||
#if GFX_VER >= 12
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
struct anv_batch *batch = &submit->batch;
|
||||
|
||||
uint32_t cmds[128];
|
||||
struct anv_batch batch = {
|
||||
.start = cmds,
|
||||
.next = cmds,
|
||||
.end = (void *)cmds + sizeof(cmds),
|
||||
};
|
||||
|
||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_INVAL), trtt_inval) {
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_INVAL), trtt_inval) {
|
||||
trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL;
|
||||
}
|
||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_NULL), trtt_null) {
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_NULL), trtt_null) {
|
||||
trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL;
|
||||
}
|
||||
#if GFX_VER >= 20
|
||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
||||
trtt_va_range.TRVABase = device->physical->va.trtt.addr >> 44;
|
||||
}
|
||||
#else
|
||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
||||
trtt_va_range.TRVAMaskValue = 0xF;
|
||||
trtt_va_range.TRVADataValue = 0xF;
|
||||
}
|
||||
@@ -1428,28 +1422,24 @@ genX(init_trtt_context_state)(struct anv_queue *queue)
|
||||
|
||||
uint64_t l3_addr = trtt->l3_addr;
|
||||
assert((l3_addr & 0xFFF) == 0);
|
||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_L3_BASE_LOW), trtt_base_low) {
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_L3_BASE_LOW), trtt_base_low) {
|
||||
trtt_base_low.TRVAL3PointerLowerAddress =
|
||||
(l3_addr & 0xFFFFF000) >> 12;
|
||||
}
|
||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_L3_BASE_HIGH),
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_L3_BASE_HIGH),
|
||||
trtt_base_high) {
|
||||
trtt_base_high.TRVAL3PointerUpperAddress =
|
||||
(l3_addr >> 32) & 0xFFFF;
|
||||
}
|
||||
/* Enabling TR-TT needs to be done after setting up the other registers.
|
||||
*/
|
||||
anv_batch_write_reg(&batch, GENX(GFX_TRTT_CR), trtt_cr) {
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_CR), trtt_cr) {
|
||||
trtt_cr.TRTTEnable = true;
|
||||
}
|
||||
|
||||
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
|
||||
assert(batch.next <= batch.end);
|
||||
|
||||
VkResult res = anv_queue_submit_simple_batch(queue, &batch, false);
|
||||
if (res != VK_SUCCESS)
|
||||
return res;
|
||||
|
||||
genx_batch_emit_pipe_control(batch, device->info, _3D,
|
||||
ANV_PIPE_CS_STALL_BIT |
|
||||
ANV_PIPE_TLB_INVALIDATE_BIT);
|
||||
#endif
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
@@ -1051,105 +1051,3 @@ fail:
|
||||
anv_execbuf_finish(&execbuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
i915_execute_trtt_batch(struct anv_sparse_submission *submit,
|
||||
struct anv_trtt_batch_bo *trtt_bbo)
|
||||
{
|
||||
struct anv_queue *queue = submit->queue;
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
struct anv_execbuf execbuf = {
|
||||
.alloc = &device->vk.alloc,
|
||||
.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
||||
};
|
||||
VkResult result;
|
||||
|
||||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf, submit->waits[i].sync,
|
||||
false /* is_signal */,
|
||||
submit->waits[i].wait_value);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf, submit->signals[i].sync,
|
||||
true /* is_signal */,
|
||||
submit->signals[i].signal_value);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
|
||||
result = anv_execbuf_add_syncobj(device, &execbuf, trtt->timeline_handle,
|
||||
I915_EXEC_FENCE_SIGNAL,
|
||||
trtt_bbo->timeline_val);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
|
||||
|
||||
result = anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL,
|
||||
0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
|
||||
for (int i = 0; i < trtt->num_page_table_bos; i++) {
|
||||
result = anv_execbuf_add_bo(device, &execbuf, trtt->page_table_bos[i],
|
||||
NULL, EXEC_OBJECT_WRITE);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (queue->sync) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf, queue->sync,
|
||||
true /* is_signal */,
|
||||
0 /* signal_value */);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
|
||||
result = anv_execbuf_add_bo(device, &execbuf, trtt_bbo->bo, NULL, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SUBMIT))
|
||||
anv_i915_debug_submit(&execbuf);
|
||||
|
||||
uint64_t exec_flags = 0;
|
||||
uint32_t context_id;
|
||||
get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
|
||||
|
||||
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||
.buffers_ptr = (uintptr_t) execbuf.objects,
|
||||
.buffer_count = execbuf.bo_count,
|
||||
.batch_start_offset = 0,
|
||||
.batch_len = trtt_bbo->size,
|
||||
.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | exec_flags,
|
||||
.rsvd1 = context_id,
|
||||
.rsvd2 = 0,
|
||||
};
|
||||
setup_execbuf_fence_params(&execbuf);
|
||||
|
||||
ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
|
||||
|
||||
int ret = queue->device->info->no_hw ? 0 :
|
||||
anv_gem_execbuffer(device, &execbuf.execbuf);
|
||||
if (ret) {
|
||||
result = vk_device_set_lost(&device->vk,
|
||||
"trtt anv_gem_execbuffer failed: %m");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (queue->sync) {
|
||||
result = vk_sync_wait(&device->vk, queue->sync, 0,
|
||||
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
|
||||
if (result != VK_SUCCESS) {
|
||||
result = vk_queue_set_lost(&queue->vk, "trtt sync wait failed");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
anv_execbuf_finish(&execbuf);
|
||||
return result;
|
||||
}
|
||||
|
@@ -29,15 +29,12 @@
|
||||
|
||||
#include "vk_sync.h"
|
||||
|
||||
struct anv_device;
|
||||
struct anv_queue;
|
||||
struct anv_bo;
|
||||
struct anv_cmd_buffer;
|
||||
struct anv_query_pool;
|
||||
struct anv_async_submit;
|
||||
struct anv_utrace_submit;
|
||||
struct anv_sparse_submission;
|
||||
struct anv_trtt_batch_bo;
|
||||
|
||||
VkResult
|
||||
i915_queue_exec_async(struct anv_async_submit *submit,
|
||||
@@ -50,10 +47,6 @@ VkResult
|
||||
i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
||||
uint32_t batch_bo_size, bool is_companion_rcs_batch);
|
||||
|
||||
VkResult
|
||||
i915_execute_trtt_batch(struct anv_sparse_submission *submit,
|
||||
struct anv_trtt_batch_bo *trtt_bbo);
|
||||
|
||||
VkResult
|
||||
i915_queue_exec_locked(struct anv_queue *queue,
|
||||
uint32_t wait_count,
|
||||
|
@@ -297,7 +297,6 @@ anv_i915_kmd_backend_get(void)
|
||||
.vm_bind_bo = i915_vm_bind_bo,
|
||||
.vm_unbind_bo = i915_vm_bind_bo,
|
||||
.execute_simple_batch = i915_execute_simple_batch,
|
||||
.execute_trtt_batch = i915_execute_trtt_batch,
|
||||
.queue_exec_locked = i915_queue_exec_locked,
|
||||
.queue_exec_async = i915_queue_exec_async,
|
||||
.bo_alloc_flags_to_bo_flags = i915_bo_alloc_flags_to_bo_flags,
|
||||
|
@@ -183,58 +183,6 @@ xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count,
|
||||
perf_query_pool, perf_query_pass);
|
||||
}
|
||||
|
||||
VkResult
|
||||
xe_execute_trtt_batch(struct anv_sparse_submission *submit,
|
||||
struct anv_trtt_batch_bo *trtt_bbo)
|
||||
{
|
||||
struct anv_queue *queue = submit->queue;
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
struct drm_xe_sync extra_sync = {
|
||||
.type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
|
||||
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
|
||||
.handle = trtt->timeline_handle,
|
||||
.timeline_value = trtt_bbo->timeline_val,
|
||||
};
|
||||
|
||||
struct drm_xe_sync *xe_syncs = NULL;
|
||||
uint32_t xe_syncs_count = 0;
|
||||
result = xe_exec_process_syncs(queue, submit->wait_count, submit->waits,
|
||||
submit->signal_count, submit->signals,
|
||||
1, &extra_sync,
|
||||
NULL, /* utrace_submit */
|
||||
false, /* is_companion_rcs_queue */
|
||||
&xe_syncs, &xe_syncs_count);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct drm_xe_exec exec = {
|
||||
.exec_queue_id = queue->exec_queue_id,
|
||||
.num_syncs = xe_syncs_count,
|
||||
.syncs = (uintptr_t)xe_syncs,
|
||||
.address = trtt_bbo->bo->offset,
|
||||
.num_batch_buffer = 1,
|
||||
};
|
||||
|
||||
if (!device->info->no_hw) {
|
||||
if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) {
|
||||
result = vk_device_set_lost(&device->vk, "XE_EXEC failed: %m");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (queue->sync) {
|
||||
result = vk_sync_wait(&device->vk, queue->sync, 0,
|
||||
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
|
||||
}
|
||||
|
||||
out:
|
||||
vk_free(&device->vk.alloc, xe_syncs);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
xe_queue_exec_async(struct anv_async_submit *submit,
|
||||
uint32_t wait_count,
|
||||
|
@@ -36,17 +36,11 @@ struct anv_cmd_buffer;
|
||||
struct anv_query_pool;
|
||||
struct anv_async_submit;
|
||||
struct anv_utrace_submit;
|
||||
struct anv_sparse_submission;
|
||||
struct anv_trtt_batch_bo;
|
||||
|
||||
VkResult
|
||||
xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
|
||||
uint32_t batch_bo_size, bool is_companion_rcs_batch);
|
||||
|
||||
VkResult
|
||||
xe_execute_trtt_batch(struct anv_sparse_submission *submit,
|
||||
struct anv_trtt_batch_bo *trtt_bbo);
|
||||
|
||||
VkResult
|
||||
xe_queue_exec_locked(struct anv_queue *queue,
|
||||
uint32_t wait_count,
|
||||
|
@@ -346,7 +346,6 @@ anv_xe_kmd_backend_get(void)
|
||||
.vm_bind_bo = xe_vm_bind_bo,
|
||||
.vm_unbind_bo = xe_vm_unbind_bo,
|
||||
.execute_simple_batch = xe_execute_simple_batch,
|
||||
.execute_trtt_batch = xe_execute_trtt_batch,
|
||||
.queue_exec_locked = xe_queue_exec_locked,
|
||||
.queue_exec_async = xe_queue_exec_async,
|
||||
.bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags,
|
||||
|
Reference in New Issue
Block a user