radv: Add queue family argument to some functions.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25770>
This commit is contained in:
@@ -587,7 +587,7 @@ radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
||||
enum rgp_flush_bits sqtt_flush_bits = 0;
|
||||
|
||||
si_cs_emit_cache_flush(cmd_buffer->device->ws, ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, NULL,
|
||||
0, true, flush_bits, &sqtt_flush_bits, 0);
|
||||
0, RADV_QUEUE_COMPUTE, flush_bits, &sqtt_flush_bits, 0);
|
||||
|
||||
cmd_buffer->gang.flush_bits = 0;
|
||||
}
|
||||
@@ -629,10 +629,9 @@ radv_flush_gang_leader_semaphore(struct radv_cmd_buffer *cmd_buffer)
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12);
|
||||
|
||||
/* GFX writes a value to the semaphore which ACE can wait for.*/
|
||||
si_cs_emit_write_event_eop(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
si_cs_emit_write_event_eop(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
|
||||
cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value, cmd_buffer->gfx9_eop_bug_va);
|
||||
|
||||
cmd_buffer->gang.sem.emitted_leader_value = cmd_buffer->gang.sem.leader_value;
|
||||
|
||||
@@ -711,14 +710,13 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu
|
||||
|
||||
/* Force wait for graphics or compute engines to be idle. */
|
||||
si_cs_emit_cache_flush(device->ws, cmd_buffer->cs, device->physical_device->rad_info.gfx_level,
|
||||
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, cmd_buffer->qf, flags,
|
||||
&sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
|
||||
|
||||
if ((flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) && radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
|
||||
/* Force wait for compute engines to be idle on the internal cmdbuf. */
|
||||
si_cs_emit_cache_flush(device->ws, cmd_buffer->gang.cs, device->physical_device->rad_info.gfx_level, NULL, 0,
|
||||
true, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0);
|
||||
RADV_QUEUE_COMPUTE, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10593,9 +10591,9 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipe
|
||||
event_type = V_028A90_BOTTOM_OF_PIPE_TS;
|
||||
}
|
||||
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0, EOP_DST_SEL_MEM,
|
||||
EOP_DATA_SEL_VALUE_32BIT, va, value, cmd_buffer->gfx9_eop_bug_va);
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
event_type, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
}
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
@@ -11058,9 +11056,9 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
|
||||
}
|
||||
} else {
|
||||
if (append) {
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2,
|
||||
EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0);
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va,
|
||||
EOP_DATA_GDS(i, 1), 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -11174,9 +11172,9 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
} else {
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_DATA_SEL_VALUE_32BIT, va, marker, cmd_buffer->gfx9_eop_bug_va);
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
}
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
|
@@ -710,9 +710,9 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
|
||||
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
|
||||
|
||||
uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1,
|
||||
cmd_buffer->gfx9_fence_va);
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
|
||||
|
||||
radv_pc_wait_idle(cmd_buffer);
|
||||
|
@@ -1979,8 +1979,8 @@ VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device,
|
||||
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw,
|
||||
bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
|
||||
bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches);
|
||||
void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, unsigned event,
|
||||
unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va,
|
||||
void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
|
||||
unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va,
|
||||
uint32_t new_fence, uint64_t gfx9_eop_bug_va);
|
||||
|
||||
struct radv_vgt_shader_key {
|
||||
@@ -1997,8 +1997,9 @@ struct radv_vgt_shader_key {
|
||||
};
|
||||
|
||||
void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
|
||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
|
||||
uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
|
||||
enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
|
||||
uint64_t gfx9_eop_bug_va);
|
||||
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
|
||||
void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op,
|
||||
uint64_t va);
|
||||
|
@@ -1929,9 +1929,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
@@ -2066,7 +2066,6 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
uint64_t query_va = va + pool->stride * query;
|
||||
@@ -2096,7 +2095,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
||||
radeon_emit(cs, query_va);
|
||||
radeon_emit(cs, query_va >> 32);
|
||||
} else {
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, mec,
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, query_va, 0,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
}
|
||||
|
@@ -1056,7 +1056,6 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
||||
if (i < 2) {
|
||||
/* The two initial preambles have a cache flush at the beginning. */
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
const bool is_mec = queue->qf == RADV_QUEUE_COMPUTE && gfx_level >= GFX7;
|
||||
enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
|
||||
RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
|
||||
RADV_CMD_FLAG_START_PIPELINE_STATS;
|
||||
@@ -1068,7 +1067,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
||||
flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
|
||||
}
|
||||
|
||||
si_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0);
|
||||
si_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->qf, flush_bits, &sqtt_flush_bits, 0);
|
||||
}
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
|
@@ -59,13 +59,28 @@ gfx10_get_sqtt_ctrl(const struct radv_device *device, bool enable)
|
||||
return sqtt_ctrl;
|
||||
}
|
||||
|
||||
static enum radv_queue_family
|
||||
radv_ip_to_queue_family(enum amd_ip_type t)
|
||||
{
|
||||
switch (t) {
|
||||
case AMD_IP_GFX:
|
||||
return RADV_QUEUE_GENERAL;
|
||||
case AMD_IP_COMPUTE:
|
||||
return RADV_QUEUE_COMPUTE;
|
||||
case AMD_IP_SDMA:
|
||||
return RADV_QUEUE_TRANSFER;
|
||||
default:
|
||||
unreachable("Unknown IP type");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family)
|
||||
{
|
||||
const enum radv_queue_family qf = radv_ip_to_queue_family(family);
|
||||
enum rgp_flush_bits sqtt_flush_bits = 0;
|
||||
si_cs_emit_cache_flush(
|
||||
device->ws, cs, device->physical_device->rad_info.gfx_level, NULL, 0,
|
||||
family == AMD_IP_COMPUTE && device->physical_device->rad_info.gfx_level >= GFX7,
|
||||
device->ws, cs, device->physical_device->rad_info.gfx_level, NULL, 0, qf,
|
||||
(family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
|
||||
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
|
||||
RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2,
|
||||
|
@@ -946,10 +946,11 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
|
||||
}
|
||||
|
||||
void
|
||||
si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, unsigned event,
|
||||
unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va, uint32_t new_fence,
|
||||
uint64_t gfx9_eop_bug_va)
|
||||
si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
|
||||
unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va,
|
||||
uint32_t new_fence, uint64_t gfx9_eop_bug_va)
|
||||
{
|
||||
const bool is_mec = qf == RADV_QUEUE_COMPUTE && gfx_level >= GFX7;
|
||||
unsigned op =
|
||||
EVENT_TYPE(event) | EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags;
|
||||
unsigned is_gfx8_mec = is_mec && gfx_level < GFX9;
|
||||
@@ -1053,9 +1054,10 @@ si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigne
|
||||
|
||||
static void
|
||||
gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
|
||||
uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
|
||||
uint64_t flush_va, enum radv_queue_family qf, enum radv_cmd_flush_bits flush_bits,
|
||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
|
||||
{
|
||||
const bool is_mec = qf == RADV_QUEUE_COMPUTE;
|
||||
uint32_t gcr_cntl = 0;
|
||||
unsigned cb_db_event = 0;
|
||||
|
||||
@@ -1225,13 +1227,12 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level
|
||||
assert(flush_cnt);
|
||||
(*flush_cnt)++;
|
||||
|
||||
si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event,
|
||||
si_cs_emit_write_event_eop(cs, gfx_level, qf, cb_db_event,
|
||||
S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) |
|
||||
S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
|
||||
S_490_SEQ(gcr_seq),
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
|
||||
|
||||
const enum radv_queue_family qf = is_mec ? RADV_QUEUE_COMPUTE : RADV_QUEUE_GENERAL;
|
||||
radv_cp_wait_mem(cs, qf, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
||||
}
|
||||
}
|
||||
@@ -1277,8 +1278,9 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level
|
||||
|
||||
void
|
||||
si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
|
||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
|
||||
uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
|
||||
enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
|
||||
uint64_t gfx9_eop_bug_va)
|
||||
{
|
||||
unsigned cp_coher_cntl = 0;
|
||||
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
|
||||
@@ -1287,11 +1289,12 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum
|
||||
|
||||
if (gfx_level >= GFX10) {
|
||||
/* GFX10 cache flush handling is quite different. */
|
||||
gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, is_mec, flush_bits, sqtt_flush_bits,
|
||||
gfx9_eop_bug_va);
|
||||
gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, qf, flush_bits, sqtt_flush_bits, gfx9_eop_bug_va);
|
||||
return;
|
||||
}
|
||||
|
||||
const bool is_mec = qf == RADV_QUEUE_COMPUTE && gfx_level >= GFX7;
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
|
||||
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
|
||||
@@ -1394,7 +1397,6 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum
|
||||
|
||||
si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
|
||||
flush_va, *flush_cnt, gfx9_eop_bug_va);
|
||||
const enum radv_queue_family qf = is_mec ? RADV_QUEUE_COMPUTE : RADV_QUEUE_GENERAL;
|
||||
radv_cp_wait_mem(cs, qf, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user