amd: fix parsing the last dword of DMA_DATA packets

It was parsing it as SQ_WAVE_GPR_ALLOC instead of COMMAND.
Change the offset to an odd number to work around it.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9795>
This commit is contained in:
Marek Olšák
2021-03-21 13:00:51 -04:00
committed by Marge Bot
parent 95940459be
commit b3e7c77f13
6 changed files with 25 additions and 24 deletions

View File

@@ -394,7 +394,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
break;
case PKT3_DMA_DATA:
ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
@@ -402,7 +402,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
break;
case PKT3_INDIRECT_BUFFER_SI:
case PKT3_INDIRECT_BUFFER_CONST:

View File

@@ -122,14 +122,15 @@
},
"register_mappings": [
{
"comment": "This is at offset 0x415 instead of 0x414 due to a conflict with SQ_WAVE_GPR_ALLOC",
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
"map": {"at": 1044, "to": "pkt3"},
"map": {"at": 1045, "to": "pkt3"},
"name": "COMMAND",
"type_ref": "COMMAND"
},
{
"chips": ["gfx9", "gfx10", "gfx103"],
"map": {"at": 1044, "to": "pkt3"},
"map": {"at": 1045, "to": "pkt3"},
"name": "COMMAND",
"type_ref": "COMMAND_gfx9"
},

View File

@@ -7128,8 +7128,8 @@ gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, va >> 32);
radeon_emit(cs, 4 * i); /* destination in GDS */
radeon_emit(cs, 0);
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) |
S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) |
S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
}
radv_set_streamout_enable(cmd_buffer, true);

View File

@@ -1647,8 +1647,8 @@ si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
{
unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
S_414_BYTE_COUNT_GFX9(~0u) :
S_414_BYTE_COUNT_GFX6(~0u);
S_415_BYTE_COUNT_GFX9(~0u) :
S_415_BYTE_COUNT_GFX6(~0u);
/* make it aligned for optimal performance */
return max & ~(SI_CPDMA_ALIGNMENT - 1);
@@ -1669,22 +1669,22 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
command |= S_414_BYTE_COUNT_GFX9(size);
command |= S_415_BYTE_COUNT_GFX9(size);
else
command |= S_414_BYTE_COUNT_GFX6(size);
command |= S_415_BYTE_COUNT_GFX6(size);
/* Sync flags. */
if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1);
else {
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
else
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
}
if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1);
command |= S_415_RAW_WAIT(1);
/* Src and dst flags. */
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&

View File

@@ -43,7 +43,7 @@
static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
{
unsigned max =
sctx->chip_class >= GFX9 ? S_414_BYTE_COUNT_GFX9(~0u) : S_414_BYTE_COUNT_GFX6(~0u);
sctx->chip_class >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u);
/* make it aligned for optimal performance */
return max & ~(SI_CPDMA_ALIGNMENT - 1);
@@ -63,16 +63,16 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS);
if (sctx->chip_class >= GFX9)
command |= S_414_BYTE_COUNT_GFX9(size);
command |= S_415_BYTE_COUNT_GFX9(size);
else
command |= S_414_BYTE_COUNT_GFX6(size);
command |= S_415_BYTE_COUNT_GFX6(size);
/* Sync flags. */
if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1);
if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1);
command |= S_415_RAW_WAIT(1);
/* Src and dst flags. */
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) {
@@ -80,7 +80,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
} else if (flags & CP_DMA_DST_IS_GDS) {
header |= S_411_DST_SEL(V_411_GDS);
/* GDS increments the address, not CP. */
command |= S_414_DAS(V_414_REGISTER) | S_414_DAIC(V_414_NO_INCREMENT);
command |= S_415_DAS(V_415_REGISTER) | S_415_DAIC(V_415_NO_INCREMENT);
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
header |=
S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
@@ -91,7 +91,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
} else if (flags & CP_DMA_SRC_IS_GDS) {
header |= S_411_SRC_SEL(V_411_GDS);
/* Both of these are required for GDS. It does increment the address. */
command |= S_414_SAS(V_414_REGISTER) | S_414_SAIC(V_414_NO_INCREMENT);
command |= S_415_SAS(V_415_REGISTER) | S_415_SAIC(V_415_NO_INCREMENT);
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
header |=
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
@@ -408,16 +408,16 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
*/
assert(size % SI_CPDMA_ALIGNMENT == 0);
assert(address % SI_CPDMA_ALIGNMENT == 0);
assert(size < S_414_BYTE_COUNT_GFX6(~0u));
assert(size < S_415_BYTE_COUNT_GFX6(~0u));
uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
uint32_t command = S_414_BYTE_COUNT_GFX6(size);
uint32_t command = S_415_BYTE_COUNT_GFX6(size);
if (sctx->chip_class >= GFX9) {
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
header |= S_411_DST_SEL(V_411_NOWHERE);
} else {
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
}

View File

@@ -247,7 +247,7 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
radeon_emit(cs, va >> 32);
radeon_emit(cs, 4 * i); /* destination in GDS */
radeon_emit(cs, 0);
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
}
radeon_end();