radv: Move radv_cp_wait_mem to radv_cs.h and add queue family argument.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25770>
This commit is contained in:
@@ -656,8 +656,8 @@ radv_wait_gang_leader(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
radeon_check_space(cmd_buffer->device->ws, ace_cs, 7);
|
radeon_check_space(cmd_buffer->device->ws, ace_cs, 7);
|
||||||
|
|
||||||
/* ACE waits for the semaphore which GFX wrote. */
|
/* ACE waits for the semaphore which GFX wrote. */
|
||||||
radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value,
|
radv_cp_wait_mem(ace_cs, RADV_QUEUE_COMPUTE, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->gang.sem.va,
|
||||||
0xffffffff);
|
cmd_buffer->gang.sem.leader_value, 0xffffffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct radeon_cmdbuf *
|
static struct radeon_cmdbuf *
|
||||||
@@ -10688,7 +10688,7 @@ radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const Vk
|
|||||||
|
|
||||||
ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
|
ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
|
||||||
|
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
|
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
|
||||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -208,4 +208,20 @@ radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigne
|
|||||||
radeon_emit(cs, 0); /* unused */
|
radeon_emit(cs, 0); /* unused */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE static void
|
||||||
|
radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
|
||||||
|
const uint32_t ref, const uint32_t mask)
|
||||||
|
{
|
||||||
|
assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
|
||||||
|
assert(qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE);
|
||||||
|
|
||||||
|
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
|
||||||
|
radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
|
||||||
|
radeon_emit(cs, va);
|
||||||
|
radeon_emit(cs, va >> 32);
|
||||||
|
radeon_emit(cs, ref); /* reference value */
|
||||||
|
radeon_emit(cs, mask); /* mask */
|
||||||
|
radeon_emit(cs, 4); /* poll interval */
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* RADV_CS_H */
|
#endif /* RADV_CS_H */
|
||||||
|
@@ -713,7 +713,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
|
|||||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||||
EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
|
EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
|
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
|
||||||
|
|
||||||
radv_pc_wait_idle(cmd_buffer);
|
radv_pc_wait_idle(cmd_buffer);
|
||||||
radv_pc_stop_and_sample(cmd_buffer, pool, va, true);
|
radv_pc_stop_and_sample(cmd_buffer, pool, va, true);
|
||||||
|
@@ -1996,7 +1996,6 @@ struct radv_vgt_shader_key {
|
|||||||
uint8_t vs_wave32 : 1;
|
uint8_t vs_wave32 : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask);
|
|
||||||
void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||||
uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
|
uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
|
||||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
|
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
|
||||||
|
@@ -1486,7 +1486,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||||||
uint64_t avail_va = va + pool->availability_offset + 4 * query;
|
uint64_t avail_va = va + pool->availability_offset + 4 * query;
|
||||||
|
|
||||||
/* This waits on the ME. All copies below are done on the ME */
|
/* This waits on the ME. All copies below are done on the ME */
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
|
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, pool->bo,
|
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, pool->bo,
|
||||||
@@ -1509,7 +1509,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||||||
/* Wait on the high 32 bits of the timestamp in
|
/* Wait on the high 32 bits of the timestamp in
|
||||||
* case the low part is 0xffffffff.
|
* case the low part is 0xffffffff.
|
||||||
*/
|
*/
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4, TIMESTAMP_NOT_READY >> 32, 0xffffffff);
|
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4, TIMESTAMP_NOT_READY >> 32,
|
||||||
|
0xffffffff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1527,7 +1528,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||||||
|
|
||||||
/* Wait on the upper word of all results. */
|
/* Wait on the upper word of all results. */
|
||||||
for (unsigned j = 0; j < 4; j++, src_va += 8) {
|
for (unsigned j = 0; j < 4; j++, src_va += 8) {
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1545,8 +1546,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||||||
radeon_check_space(cmd_buffer->device->ws, cs, 7 * 2);
|
radeon_check_space(cmd_buffer->device->ws, cs, 7 * 2);
|
||||||
|
|
||||||
/* Wait on the upper word of the PrimitiveStorageNeeded result. */
|
/* Wait on the upper word of the PrimitiveStorageNeeded result. */
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 20, 0x80000000, 0xffffffff);
|
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 20, 0x80000000, 0xffffffff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1285,7 +1285,7 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||||||
* in a multi-process environment, because task shader dispatches are not
|
* in a multi-process environment, because task shader dispatches are not
|
||||||
* meant to be executed on multiple compute engines at the same time.
|
* meant to be executed on multiple compute engines at the same time.
|
||||||
*/
|
*/
|
||||||
radv_cp_wait_mem(ace_pre_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff);
|
radv_cp_wait_mem(ace_pre_cs, RADV_QUEUE_COMPUTE, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff);
|
||||||
radeon_emit(ace_pre_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
radeon_emit(ace_pre_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||||
radeon_emit(ace_pre_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
|
radeon_emit(ace_pre_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
|
||||||
radeon_emit(ace_pre_cs, ace_wait_va);
|
radeon_emit(ace_pre_cs, ace_wait_va);
|
||||||
@@ -1303,7 +1303,7 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||||||
* as soon as the gang leader is done, which may lead to bugs because the
|
* as soon as the gang leader is done, which may lead to bugs because the
|
||||||
* same command buffers could be submitted again while still being executed.
|
* same command buffers could be submitted again while still being executed.
|
||||||
*/
|
*/
|
||||||
radv_cp_wait_mem(leader_post_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
|
radv_cp_wait_mem(leader_post_cs, queue->state.qf, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
|
||||||
radeon_emit(leader_post_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
radeon_emit(leader_post_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||||
radeon_emit(leader_post_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
|
radeon_emit(leader_post_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
|
||||||
radeon_emit(leader_post_cs, leader_wait_va);
|
radeon_emit(leader_post_cs, leader_wait_va);
|
||||||
|
@@ -1029,20 +1029,6 @@ si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_leve
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
|
|
||||||
{
|
|
||||||
assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
|
|
||||||
|
|
||||||
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
|
|
||||||
radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
|
|
||||||
radeon_emit(cs, va);
|
|
||||||
radeon_emit(cs, va >> 32);
|
|
||||||
radeon_emit(cs, ref); /* reference value */
|
|
||||||
radeon_emit(cs, mask); /* mask */
|
|
||||||
radeon_emit(cs, 4); /* poll interval */
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
|
si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
|
||||||
{
|
{
|
||||||
@@ -1245,7 +1231,8 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level
|
|||||||
S_490_SEQ(gcr_seq),
|
S_490_SEQ(gcr_seq),
|
||||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
|
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
|
||||||
|
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
const enum radv_queue_family qf = is_mec ? RADV_QUEUE_COMPUTE : RADV_QUEUE_GENERAL;
|
||||||
|
radv_cp_wait_mem(cs, qf, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1407,7 +1394,8 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum
|
|||||||
|
|
||||||
si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
|
si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
|
||||||
flush_va, *flush_cnt, gfx9_eop_bug_va);
|
flush_va, *flush_cnt, gfx9_eop_bug_va);
|
||||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
const enum radv_queue_family qf = is_mec ? RADV_QUEUE_COMPUTE : RADV_QUEUE_GENERAL;
|
||||||
|
radv_cp_wait_mem(cs, qf, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* VGT state sync */
|
/* VGT state sync */
|
||||||
|
Reference in New Issue
Block a user