anv/trtt: make all contexts have the same TR-TT programming
On Gen12 (the oldest we support on Mesa right now for TR-TT) we started having per-engine TR-TT registers and we are supposed to make all contexts share the same TR-TT programming. On LNL+, this is documented in the BSpec page for the TRTT_CNTRL register (68417), with more details in HSDs 14020454786 and 16022013154. On Gen12 platforms this information is a little harder to find and there's a whole trail of HSDs leading up to 1209977595, which links to the documents that describe the programming. BSpec for TR-TT on Gen12 is very confusing as it still contains registers and other information from Gen11 that were not removed. Regarding the additional BLT and COMP registers, please notice that on the BSpec pages for the TR-TT registers, the "Register Instance" section only lists the GFX registers as non-privileged. However, the "User Mode Privileged Commands" lists the other instances of the TR-TT Regsiters as non-privileged, which matches what we see: there's no need to put these addresses in the FORCE_TO_NONPRIV registers. Notice that for now, when TR-TT is being used we only expose a single queue, so this change effectively does nothing until we start exposing extra queues. I left that part for later to help bisectability. v2: - s/trtt_init_context_state/trtt_init_queues_state/ (José) - pass device as the argument to init_queues_state (José) v3: - use async_submit_end (José) Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30252>
This commit is contained in:
@@ -354,8 +354,7 @@ genX(simple_shader_push_state_address)(struct anv_simple_shader *state,
|
||||
void
|
||||
genX(emit_simple_shader_end)(struct anv_simple_shader *state);
|
||||
|
||||
VkResult genX(init_trtt_context_state)(struct anv_device *device,
|
||||
struct anv_async_submit *submit);
|
||||
VkResult genX(init_trtt_context_state)(struct anv_async_submit *submit);
|
||||
|
||||
void genX(write_trtt_entries)(struct anv_async_submit *submit,
|
||||
struct anv_trtt_bind *l3l2_binds,
|
||||
|
@@ -405,9 +405,8 @@ trtt_get_page_table_bo(struct anv_device *device, struct anv_bo **bo,
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_trtt_init_context_state(struct anv_queue *queue)
|
||||
anv_trtt_init_queues_state(struct anv_device *device)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
|
||||
struct anv_bo *l3_bo;
|
||||
@@ -417,43 +416,52 @@ anv_trtt_init_context_state(struct anv_queue *queue)
|
||||
|
||||
trtt->l3_mirror = vk_zalloc(&device->vk.alloc, 4096, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!trtt->l3_mirror) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return result;
|
||||
}
|
||||
if (!trtt->l3_mirror)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* L3 has 512 entries, so we can have up to 512 L2 tables. */
|
||||
trtt->l2_mirror = vk_zalloc(&device->vk.alloc, 512 * 4096, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!trtt->l2_mirror) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail_free_l3;
|
||||
vk_free(&device->vk.alloc, trtt->l3_mirror);
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
struct anv_async_submit submits[device->queue_count];
|
||||
int submits_used = 0;
|
||||
for (uint32_t i = 0; i < device->queue_count; i++) {
|
||||
struct anv_queue *q = &device->queues[i];
|
||||
|
||||
struct anv_async_submit submit;
|
||||
result = anv_async_submit_init(&submit, queue, &device->batch_bo_pool,
|
||||
false, true);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
result = anv_async_submit_init(&submits[submits_used], q,
|
||||
&device->batch_bo_pool, false, true);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
|
||||
result = anv_genX(device->info, init_trtt_context_state)(device, &submit);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_fini_submit;
|
||||
struct anv_async_submit *submit = &submits[submits_used++];
|
||||
|
||||
anv_genX(device->info, async_submit_end)(&submit);
|
||||
result = anv_genX(device->info, init_trtt_context_state)(submit);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_async_submit_fini(submit);
|
||||
submits_used--;
|
||||
break;
|
||||
}
|
||||
|
||||
result = device->kmd_backend->queue_exec_async(&submit, 0, NULL, 1,
|
||||
&submit.signal);
|
||||
anv_genX(device->info, async_submit_end)(submit);
|
||||
|
||||
anv_async_submit_wait(&submit);
|
||||
result = device->kmd_backend->queue_exec_async(submit, 0, NULL, 1,
|
||||
&submit->signal);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_async_submit_fini(submit);
|
||||
submits_used--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fail_fini_submit:
|
||||
anv_async_submit_fini(&submit);
|
||||
return result;
|
||||
for (uint32_t i = 0; i < submits_used; i++) {
|
||||
anv_async_submit_wait(&submits[i]);
|
||||
anv_async_submit_fini(&submits[i]);
|
||||
}
|
||||
|
||||
fail_free_l3:
|
||||
vk_free(&device->vk.alloc, trtt->l3_mirror);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -645,7 +653,7 @@ anv_sparse_bind_trtt(struct anv_device *device,
|
||||
* submission.
|
||||
*/
|
||||
if (!trtt->l3_addr) {
|
||||
result = anv_trtt_init_context_state(sparse_submit->queue);
|
||||
result = anv_trtt_init_queues_state(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_add_bind;
|
||||
}
|
||||
|
@@ -1442,10 +1442,11 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
|
||||
VkResult
|
||||
genX(init_trtt_context_state)(struct anv_device *device,
|
||||
struct anv_async_submit *submit)
|
||||
genX(init_trtt_context_state)(struct anv_async_submit *submit)
|
||||
{
|
||||
#if GFX_VER >= 12
|
||||
struct anv_queue *queue = submit->queue;
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_trtt *trtt = &device->trtt;
|
||||
struct anv_batch *batch = &submit->batch;
|
||||
|
||||
@@ -1462,25 +1463,61 @@ genX(init_trtt_context_state)(struct anv_device *device,
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_L3_BASE_HIGH), trtt_base_high)
|
||||
trtt_base_high.TRVAL3PointerUpperAddress = l3_addr_high;
|
||||
|
||||
anv_batch_write_reg(batch, GENX(BLT_TRTT_INVAL), trtt_inval)
|
||||
trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL;
|
||||
anv_batch_write_reg(batch, GENX(BLT_TRTT_NULL), trtt_null)
|
||||
trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL;
|
||||
anv_batch_write_reg(batch, GENX(BLT_TRTT_L3_BASE_LOW), trtt_base_low)
|
||||
trtt_base_low.TRVAL3PointerLowerAddress = l3_addr_low;
|
||||
anv_batch_write_reg(batch, GENX(BLT_TRTT_L3_BASE_HIGH), trtt_base_high)
|
||||
trtt_base_high.TRVAL3PointerUpperAddress = l3_addr_high;
|
||||
|
||||
anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_INVAL), trtt_inval)
|
||||
trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL;
|
||||
anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_NULL), trtt_null)
|
||||
trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL;
|
||||
anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_L3_BASE_LOW), trtt_base_low)
|
||||
trtt_base_low.TRVAL3PointerLowerAddress = l3_addr_low;
|
||||
anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_L3_BASE_HIGH), trtt_base_high)
|
||||
trtt_base_high.TRVAL3PointerUpperAddress = l3_addr_high;
|
||||
|
||||
#if GFX_VER >= 20
|
||||
uint32_t trva_base = device->physical->va.trtt.addr >> 44;
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range)
|
||||
trtt_va_range.TRVABase = trva_base;
|
||||
anv_batch_write_reg(batch, GENX(BLT_TRTT_VA_RANGE), trtt_va_range)
|
||||
trtt_va_range.TRVABase = trva_base;
|
||||
anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_VA_RANGE), trtt_va_range)
|
||||
trtt_va_range.TRVABase = trva_base;
|
||||
#else
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
|
||||
trtt_va_range.TRVAMaskValue = 0xF;
|
||||
trtt_va_range.TRVADataValue = 0xF;
|
||||
}
|
||||
anv_batch_write_reg(batch, GENX(BLT_TRTT_VA_RANGE), trtt_va_range) {
|
||||
trtt_va_range.TRVAMaskValue = 0xF;
|
||||
trtt_va_range.TRVADataValue = 0xF;
|
||||
}
|
||||
anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_VA_RANGE), trtt_va_range) {
|
||||
trtt_va_range.TRVAMaskValue = 0xF;
|
||||
trtt_va_range.TRVADataValue = 0xF;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Enabling TR-TT needs to be done after setting up the other registers.
|
||||
*/
|
||||
anv_batch_write_reg(batch, GENX(GFX_TRTT_CR), trtt_cr)
|
||||
trtt_cr.TRTTEnable = true;
|
||||
anv_batch_write_reg(batch, GENX(BLT_TRTT_CR), trtt_cr)
|
||||
trtt_cr.TRTTEnable = true;
|
||||
anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_CR), trtt_cr)
|
||||
trtt_cr.TRTTEnable = true;
|
||||
|
||||
genx_batch_emit_pipe_control(batch, device->info, _3D,
|
||||
ANV_PIPE_CS_STALL_BIT |
|
||||
ANV_PIPE_TLB_INVALIDATE_BIT);
|
||||
if (queue->family->engine_class != INTEL_ENGINE_CLASS_COPY) {
|
||||
genx_batch_emit_pipe_control(batch, device->info, _3D,
|
||||
ANV_PIPE_CS_STALL_BIT |
|
||||
ANV_PIPE_TLB_INVALIDATE_BIT);
|
||||
}
|
||||
#endif
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
Reference in New Issue
Block a user