radv: Flush in the initial preamble CS.
Signed-off-by: Bas Nieuwenhuizen <basni@google.com> Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -792,8 +792,10 @@ radv_queue_finish(struct radv_queue *queue)
|
|||||||
if (queue->hw_ctx)
|
if (queue->hw_ctx)
|
||||||
queue->device->ws->ctx_destroy(queue->hw_ctx);
|
queue->device->ws->ctx_destroy(queue->hw_ctx);
|
||||||
|
|
||||||
if (queue->preamble_cs)
|
if (queue->initial_preamble_cs)
|
||||||
queue->device->ws->cs_destroy(queue->preamble_cs);
|
queue->device->ws->cs_destroy(queue->initial_preamble_cs);
|
||||||
|
if (queue->continue_preamble_cs)
|
||||||
|
queue->device->ws->cs_destroy(queue->continue_preamble_cs);
|
||||||
if (queue->descriptor_bo)
|
if (queue->descriptor_bo)
|
||||||
queue->device->ws->buffer_destroy(queue->descriptor_bo);
|
queue->device->ws->buffer_destroy(queue->descriptor_bo);
|
||||||
if (queue->scratch_bo)
|
if (queue->scratch_bo)
|
||||||
@@ -939,6 +941,21 @@ VkResult radv_CreateDevice(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
device->ws->cs_finalize(device->empty_cs[family]);
|
device->ws->cs_finalize(device->empty_cs[family]);
|
||||||
|
|
||||||
|
device->flush_cs[family] = device->ws->cs_create(device->ws, family);
|
||||||
|
switch (family) {
|
||||||
|
case RADV_QUEUE_GENERAL:
|
||||||
|
case RADV_QUEUE_COMPUTE:
|
||||||
|
si_cs_emit_cache_flush(device->flush_cs[family],
|
||||||
|
device->physical_device->rad_info.chip_class,
|
||||||
|
family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
|
||||||
|
RADV_CMD_FLAG_INV_ICACHE |
|
||||||
|
RADV_CMD_FLAG_INV_SMEM_L1 |
|
||||||
|
RADV_CMD_FLAG_INV_VMEM_L1 |
|
||||||
|
RADV_CMD_FLAG_INV_GLOBAL_L2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
device->ws->cs_finalize(device->flush_cs[family]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getenv("RADV_TRACE_FILE")) {
|
if (getenv("RADV_TRACE_FILE")) {
|
||||||
@@ -995,6 +1012,8 @@ void radv_DestroyDevice(
|
|||||||
vk_free(&device->alloc, device->queues[i]);
|
vk_free(&device->alloc, device->queues[i]);
|
||||||
if (device->empty_cs[i])
|
if (device->empty_cs[i])
|
||||||
device->ws->cs_destroy(device->empty_cs[i]);
|
device->ws->cs_destroy(device->empty_cs[i]);
|
||||||
|
if (device->flush_cs[i])
|
||||||
|
device->ws->cs_destroy(device->flush_cs[i]);
|
||||||
}
|
}
|
||||||
radv_device_finish_meta(device);
|
radv_device_finish_meta(device);
|
||||||
|
|
||||||
@@ -1192,25 +1211,25 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
uint32_t compute_scratch_size,
|
uint32_t compute_scratch_size,
|
||||||
uint32_t esgs_ring_size,
|
uint32_t esgs_ring_size,
|
||||||
uint32_t gsvs_ring_size,
|
uint32_t gsvs_ring_size,
|
||||||
struct radeon_winsys_cs **preamble_cs)
|
struct radeon_winsys_cs **initial_preamble_cs,
|
||||||
|
struct radeon_winsys_cs **continue_preamble_cs)
|
||||||
{
|
{
|
||||||
struct radeon_winsys_bo *scratch_bo = NULL;
|
struct radeon_winsys_bo *scratch_bo = NULL;
|
||||||
struct radeon_winsys_bo *descriptor_bo = NULL;
|
struct radeon_winsys_bo *descriptor_bo = NULL;
|
||||||
struct radeon_winsys_bo *compute_scratch_bo = NULL;
|
struct radeon_winsys_bo *compute_scratch_bo = NULL;
|
||||||
struct radeon_winsys_bo *esgs_ring_bo = NULL;
|
struct radeon_winsys_bo *esgs_ring_bo = NULL;
|
||||||
struct radeon_winsys_bo *gsvs_ring_bo = NULL;
|
struct radeon_winsys_bo *gsvs_ring_bo = NULL;
|
||||||
struct radeon_winsys_cs *cs = NULL;
|
struct radeon_winsys_cs *dest_cs[2] = {0};
|
||||||
|
|
||||||
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) {
|
|
||||||
*preamble_cs = NULL;
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (scratch_size <= queue->scratch_size &&
|
if (scratch_size <= queue->scratch_size &&
|
||||||
compute_scratch_size <= queue->compute_scratch_size &&
|
compute_scratch_size <= queue->compute_scratch_size &&
|
||||||
esgs_ring_size <= queue->esgs_ring_size &&
|
esgs_ring_size <= queue->esgs_ring_size &&
|
||||||
gsvs_ring_size <= queue->gsvs_ring_size) {
|
gsvs_ring_size <= queue->gsvs_ring_size &&
|
||||||
*preamble_cs = queue->preamble_cs;
|
queue->initial_preamble_cs) {
|
||||||
|
*initial_preamble_cs = queue->initial_preamble_cs;
|
||||||
|
*continue_preamble_cs = queue->continue_preamble_cs;
|
||||||
|
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
|
||||||
|
*continue_preamble_cs = NULL;
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1282,11 +1301,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
} else
|
} else
|
||||||
descriptor_bo = queue->descriptor_bo;
|
descriptor_bo = queue->descriptor_bo;
|
||||||
|
|
||||||
|
for(int i = 0; i < 2; ++i) {
|
||||||
|
struct radeon_winsys_cs *cs = NULL;
|
||||||
cs = queue->device->ws->cs_create(queue->device->ws,
|
cs = queue->device->ws->cs_create(queue->device->ws,
|
||||||
queue->queue_family_index ? RING_COMPUTE : RING_GFX);
|
queue->queue_family_index ? RING_COMPUTE : RING_GFX);
|
||||||
if (!cs)
|
if (!cs)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
dest_cs[i] = cs;
|
||||||
|
|
||||||
if (scratch_bo)
|
if (scratch_bo)
|
||||||
queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
|
queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
|
||||||
@@ -1363,13 +1385,29 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
radeon_emit(cs, rsrc1);
|
radeon_emit(cs, rsrc1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!i) {
|
||||||
|
si_cs_emit_cache_flush(cs,
|
||||||
|
queue->device->physical_device->rad_info.chip_class,
|
||||||
|
queue->queue_family_index == RING_COMPUTE &&
|
||||||
|
queue->device->physical_device->rad_info.chip_class >= CIK,
|
||||||
|
RADV_CMD_FLAG_INV_ICACHE |
|
||||||
|
RADV_CMD_FLAG_INV_SMEM_L1 |
|
||||||
|
RADV_CMD_FLAG_INV_VMEM_L1 |
|
||||||
|
RADV_CMD_FLAG_INV_GLOBAL_L2);
|
||||||
|
}
|
||||||
|
|
||||||
if (!queue->device->ws->cs_finalize(cs))
|
if (!queue->device->ws->cs_finalize(cs))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
if (queue->preamble_cs)
|
if (queue->initial_preamble_cs)
|
||||||
queue->device->ws->cs_destroy(queue->preamble_cs);
|
queue->device->ws->cs_destroy(queue->initial_preamble_cs);
|
||||||
|
|
||||||
queue->preamble_cs = cs;
|
if (queue->continue_preamble_cs)
|
||||||
|
queue->device->ws->cs_destroy(queue->continue_preamble_cs);
|
||||||
|
|
||||||
|
queue->initial_preamble_cs = dest_cs[0];
|
||||||
|
queue->continue_preamble_cs = dest_cs[1];
|
||||||
|
|
||||||
if (scratch_bo != queue->scratch_bo) {
|
if (scratch_bo != queue->scratch_bo) {
|
||||||
if (queue->scratch_bo)
|
if (queue->scratch_bo)
|
||||||
@@ -1406,11 +1444,15 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
queue->descriptor_bo = descriptor_bo;
|
queue->descriptor_bo = descriptor_bo;
|
||||||
}
|
}
|
||||||
|
|
||||||
*preamble_cs = cs;
|
*initial_preamble_cs = queue->initial_preamble_cs;
|
||||||
|
*continue_preamble_cs = queue->continue_preamble_cs;
|
||||||
|
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
|
||||||
|
*continue_preamble_cs = NULL;
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
fail:
|
fail:
|
||||||
if (cs)
|
for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
|
||||||
queue->device->ws->cs_destroy(cs);
|
if (dest_cs[i])
|
||||||
|
queue->device->ws->cs_destroy(dest_cs[i]);
|
||||||
if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
|
if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
|
||||||
queue->device->ws->buffer_destroy(descriptor_bo);
|
queue->device->ws->buffer_destroy(descriptor_bo);
|
||||||
if (scratch_bo && scratch_bo != queue->scratch_bo)
|
if (scratch_bo && scratch_bo != queue->scratch_bo)
|
||||||
@@ -1439,7 +1481,7 @@ VkResult radv_QueueSubmit(
|
|||||||
uint32_t scratch_size = 0;
|
uint32_t scratch_size = 0;
|
||||||
uint32_t compute_scratch_size = 0;
|
uint32_t compute_scratch_size = 0;
|
||||||
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
|
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
|
||||||
struct radeon_winsys_cs *preamble_cs = NULL;
|
struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
|
||||||
VkResult result;
|
VkResult result;
|
||||||
bool fence_emitted = false;
|
bool fence_emitted = false;
|
||||||
|
|
||||||
@@ -1458,13 +1500,16 @@ VkResult radv_QueueSubmit(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, &preamble_cs);
|
result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
|
||||||
|
esgs_ring_size, gsvs_ring_size,
|
||||||
|
&initial_preamble_cs, &continue_preamble_cs);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < submitCount; i++) {
|
for (uint32_t i = 0; i < submitCount; i++) {
|
||||||
struct radeon_winsys_cs **cs_array;
|
struct radeon_winsys_cs **cs_array;
|
||||||
bool can_patch = true;
|
bool has_flush = !submitCount;
|
||||||
|
bool can_patch = !has_flush;
|
||||||
uint32_t advance;
|
uint32_t advance;
|
||||||
|
|
||||||
if (!pSubmits[i].commandBufferCount) {
|
if (!pSubmits[i].commandBufferCount) {
|
||||||
@@ -1487,29 +1532,32 @@ VkResult radv_QueueSubmit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
|
cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
|
||||||
pSubmits[i].commandBufferCount);
|
(pSubmits[i].commandBufferCount + has_flush));
|
||||||
|
|
||||||
|
if(has_flush)
|
||||||
|
cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
|
||||||
|
|
||||||
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
|
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
|
||||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
|
||||||
pSubmits[i].pCommandBuffers[j]);
|
pSubmits[i].pCommandBuffers[j]);
|
||||||
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||||
|
|
||||||
cs_array[j] = cmd_buffer->cs;
|
cs_array[j + has_flush] = cmd_buffer->cs;
|
||||||
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
|
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
|
||||||
can_patch = false;
|
can_patch = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
|
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + has_flush; j += advance) {
|
||||||
advance = MIN2(max_cs_submission,
|
advance = MIN2(max_cs_submission,
|
||||||
pSubmits[i].commandBufferCount - j);
|
pSubmits[i].commandBufferCount + has_flush - j);
|
||||||
bool b = j == 0;
|
bool b = j == 0;
|
||||||
bool e = j + advance == pSubmits[i].commandBufferCount;
|
bool e = j + advance == pSubmits[i].commandBufferCount + has_flush;
|
||||||
|
|
||||||
if (queue->device->trace_bo)
|
if (queue->device->trace_bo)
|
||||||
*queue->device->trace_id_ptr = 0;
|
*queue->device->trace_id_ptr = 0;
|
||||||
|
|
||||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
|
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
|
||||||
advance, preamble_cs, preamble_cs,
|
advance, initial_preamble_cs, continue_preamble_cs,
|
||||||
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
|
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
|
||||||
b ? pSubmits[i].waitSemaphoreCount : 0,
|
b ? pSubmits[i].waitSemaphoreCount : 0,
|
||||||
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
|
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
|
||||||
|
@@ -479,7 +479,8 @@ struct radv_queue {
|
|||||||
struct radeon_winsys_bo *compute_scratch_bo;
|
struct radeon_winsys_bo *compute_scratch_bo;
|
||||||
struct radeon_winsys_bo *esgs_ring_bo;
|
struct radeon_winsys_bo *esgs_ring_bo;
|
||||||
struct radeon_winsys_bo *gsvs_ring_bo;
|
struct radeon_winsys_bo *gsvs_ring_bo;
|
||||||
struct radeon_winsys_cs *preamble_cs;
|
struct radeon_winsys_cs *initial_preamble_cs;
|
||||||
|
struct radeon_winsys_cs *continue_preamble_cs;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct radv_device {
|
struct radv_device {
|
||||||
@@ -495,6 +496,7 @@ struct radv_device {
|
|||||||
struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
|
struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
|
||||||
int queue_count[RADV_MAX_QUEUE_FAMILIES];
|
int queue_count[RADV_MAX_QUEUE_FAMILIES];
|
||||||
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
|
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
|
||||||
|
struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
|
||||||
|
|
||||||
uint64_t debug_flags;
|
uint64_t debug_flags;
|
||||||
|
|
||||||
@@ -764,6 +766,14 @@ void si_write_scissors(struct radeon_winsys_cs *cs, int first,
|
|||||||
int count, const VkRect2D *scissors);
|
int count, const VkRect2D *scissors);
|
||||||
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
|
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
|
||||||
bool instanced_or_indirect_draw, uint32_t draw_vertex_count);
|
bool instanced_or_indirect_draw, uint32_t draw_vertex_count);
|
||||||
|
void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||||
|
enum chip_class chip_class,
|
||||||
|
bool is_mec,
|
||||||
|
enum radv_cmd_flush_bits flush_bits);
|
||||||
|
void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||||
|
enum chip_class chip_class,
|
||||||
|
bool is_mec,
|
||||||
|
enum radv_cmd_flush_bits flush_bits);
|
||||||
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
|
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
|
||||||
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
|
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
|
||||||
uint64_t src_va, uint64_t dest_va,
|
uint64_t src_va, uint64_t dest_va,
|
||||||
|
@@ -689,7 +689,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||||
enum chip_class chip_class,
|
enum chip_class chip_class,
|
||||||
bool is_mec,
|
bool is_mec,
|
||||||
|
Reference in New Issue
Block a user