amd: fix parsing the last dword of DMA_DATA packets
It was parsing it as SQ_WAVE_GPR_ALLOC instead of COMMAND. Change the offset to an odd number to work around it. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9795>
This commit is contained in:
@@ -394,7 +394,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
||||
ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_DMA_DATA:
|
||||
ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
|
||||
@@ -402,7 +402,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
||||
ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_INDIRECT_BUFFER_SI:
|
||||
case PKT3_INDIRECT_BUFFER_CONST:
|
||||
|
@@ -122,14 +122,15 @@
|
||||
},
|
||||
"register_mappings": [
|
||||
{
|
||||
"comment": "This is at offset 0x415 instead of 0x414 due to a conflict with SQ_WAVE_GPR_ALLOC",
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
|
||||
"map": {"at": 1044, "to": "pkt3"},
|
||||
"map": {"at": 1045, "to": "pkt3"},
|
||||
"name": "COMMAND",
|
||||
"type_ref": "COMMAND"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx9", "gfx10", "gfx103"],
|
||||
"map": {"at": 1044, "to": "pkt3"},
|
||||
"map": {"at": 1045, "to": "pkt3"},
|
||||
"name": "COMMAND",
|
||||
"type_ref": "COMMAND_gfx9"
|
||||
},
|
||||
|
@@ -7128,8 +7128,8 @@ gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, 4 * i); /* destination in GDS */
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) |
|
||||
S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) |
|
||||
S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||
}
|
||||
|
||||
radv_set_streamout_enable(cmd_buffer, true);
|
||||
|
@@ -1647,8 +1647,8 @@ si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
|
||||
S_414_BYTE_COUNT_GFX9(~0u) :
|
||||
S_414_BYTE_COUNT_GFX6(~0u);
|
||||
S_415_BYTE_COUNT_GFX9(~0u) :
|
||||
S_415_BYTE_COUNT_GFX6(~0u);
|
||||
|
||||
/* make it aligned for optimal performance */
|
||||
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||
@@ -1669,22 +1669,22 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
||||
command |= S_414_BYTE_COUNT_GFX9(size);
|
||||
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||
else
|
||||
command |= S_414_BYTE_COUNT_GFX6(size);
|
||||
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||
|
||||
/* Sync flags. */
|
||||
if (flags & CP_DMA_SYNC)
|
||||
header |= S_411_CP_SYNC(1);
|
||||
else {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
||||
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
else
|
||||
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
|
||||
}
|
||||
|
||||
if (flags & CP_DMA_RAW_WAIT)
|
||||
command |= S_414_RAW_WAIT(1);
|
||||
command |= S_415_RAW_WAIT(1);
|
||||
|
||||
/* Src and dst flags. */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
|
@@ -43,7 +43,7 @@
|
||||
static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
|
||||
{
|
||||
unsigned max =
|
||||
sctx->chip_class >= GFX9 ? S_414_BYTE_COUNT_GFX9(~0u) : S_414_BYTE_COUNT_GFX6(~0u);
|
||||
sctx->chip_class >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u);
|
||||
|
||||
/* make it aligned for optimal performance */
|
||||
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||
@@ -63,16 +63,16 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
||||
assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS);
|
||||
|
||||
if (sctx->chip_class >= GFX9)
|
||||
command |= S_414_BYTE_COUNT_GFX9(size);
|
||||
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||
else
|
||||
command |= S_414_BYTE_COUNT_GFX6(size);
|
||||
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||
|
||||
/* Sync flags. */
|
||||
if (flags & CP_DMA_SYNC)
|
||||
header |= S_411_CP_SYNC(1);
|
||||
|
||||
if (flags & CP_DMA_RAW_WAIT)
|
||||
command |= S_414_RAW_WAIT(1);
|
||||
command |= S_415_RAW_WAIT(1);
|
||||
|
||||
/* Src and dst flags. */
|
||||
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) {
|
||||
@@ -80,7 +80,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
||||
} else if (flags & CP_DMA_DST_IS_GDS) {
|
||||
header |= S_411_DST_SEL(V_411_GDS);
|
||||
/* GDS increments the address, not CP. */
|
||||
command |= S_414_DAS(V_414_REGISTER) | S_414_DAIC(V_414_NO_INCREMENT);
|
||||
command |= S_415_DAS(V_415_REGISTER) | S_415_DAIC(V_415_NO_INCREMENT);
|
||||
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
|
||||
header |=
|
||||
S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
|
||||
@@ -91,7 +91,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
||||
} else if (flags & CP_DMA_SRC_IS_GDS) {
|
||||
header |= S_411_SRC_SEL(V_411_GDS);
|
||||
/* Both of these are required for GDS. It does increment the address. */
|
||||
command |= S_414_SAS(V_414_REGISTER) | S_414_SAIC(V_414_NO_INCREMENT);
|
||||
command |= S_415_SAS(V_415_REGISTER) | S_415_SAIC(V_415_NO_INCREMENT);
|
||||
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
|
||||
header |=
|
||||
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
|
||||
@@ -408,16 +408,16 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
|
||||
*/
|
||||
assert(size % SI_CPDMA_ALIGNMENT == 0);
|
||||
assert(address % SI_CPDMA_ALIGNMENT == 0);
|
||||
assert(size < S_414_BYTE_COUNT_GFX6(~0u));
|
||||
assert(size < S_415_BYTE_COUNT_GFX6(~0u));
|
||||
|
||||
uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
||||
uint32_t command = S_414_BYTE_COUNT_GFX6(size);
|
||||
uint32_t command = S_415_BYTE_COUNT_GFX6(size);
|
||||
|
||||
if (sctx->chip_class >= GFX9) {
|
||||
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
header |= S_411_DST_SEL(V_411_NOWHERE);
|
||||
} else {
|
||||
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
|
||||
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
||||
}
|
||||
|
||||
|
@@ -247,7 +247,7 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, 4 * i); /* destination in GDS */
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||
}
|
||||
radeon_end();
|
||||
|
||||
|
Reference in New Issue
Block a user