amd: fix parsing the last dword of DMA_DATA packets

It was parsing it as SQ_WAVE_GPR_ALLOC instead of COMMAND.
Change the offset to an odd number to work around it.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9795>
This commit is contained in:
Marek Olšák
2021-03-21 13:00:51 -04:00
committed by Marge Bot
parent 95940459be
commit b3e7c77f13
6 changed files with 25 additions and 24 deletions

View File

@@ -394,7 +394,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
break; break;
case PKT3_DMA_DATA: case PKT3_DMA_DATA:
ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
@@ -402,7 +402,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
break; break;
case PKT3_INDIRECT_BUFFER_SI: case PKT3_INDIRECT_BUFFER_SI:
case PKT3_INDIRECT_BUFFER_CONST: case PKT3_INDIRECT_BUFFER_CONST:

View File

@@ -122,14 +122,15 @@
}, },
"register_mappings": [ "register_mappings": [
{ {
"comment": "This is at offset 0x415 instead of 0x414 due to a conflict with SQ_WAVE_GPR_ALLOC",
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"], "chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
"map": {"at": 1044, "to": "pkt3"}, "map": {"at": 1045, "to": "pkt3"},
"name": "COMMAND", "name": "COMMAND",
"type_ref": "COMMAND" "type_ref": "COMMAND"
}, },
{ {
"chips": ["gfx9", "gfx10", "gfx103"], "chips": ["gfx9", "gfx10", "gfx103"],
"map": {"at": 1044, "to": "pkt3"}, "map": {"at": 1045, "to": "pkt3"},
"name": "COMMAND", "name": "COMMAND",
"type_ref": "COMMAND_gfx9" "type_ref": "COMMAND_gfx9"
}, },

View File

@@ -7128,8 +7128,8 @@ gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_emit(cs, 4 * i); /* destination in GDS */ radeon_emit(cs, 4 * i); /* destination in GDS */
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) |
S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target)); S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
} }
radv_set_streamout_enable(cmd_buffer, true); radv_set_streamout_enable(cmd_buffer, true);

View File

@@ -1647,8 +1647,8 @@ si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer) static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
{ {
unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ? unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
S_414_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX9(~0u) :
S_414_BYTE_COUNT_GFX6(~0u); S_415_BYTE_COUNT_GFX6(~0u);
/* make it aligned for optimal performance */ /* make it aligned for optimal performance */
return max & ~(SI_CPDMA_ALIGNMENT - 1); return max & ~(SI_CPDMA_ALIGNMENT - 1);
@@ -1669,22 +1669,22 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
command |= S_414_BYTE_COUNT_GFX9(size); command |= S_415_BYTE_COUNT_GFX9(size);
else else
command |= S_414_BYTE_COUNT_GFX6(size); command |= S_415_BYTE_COUNT_GFX6(size);
/* Sync flags. */ /* Sync flags. */
if (flags & CP_DMA_SYNC) if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1); header |= S_411_CP_SYNC(1);
else { else {
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1); command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
else else
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1); command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
} }
if (flags & CP_DMA_RAW_WAIT) if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1); command |= S_415_RAW_WAIT(1);
/* Src and dst flags. */ /* Src and dst flags. */
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 && if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&

View File

@@ -43,7 +43,7 @@
static inline unsigned cp_dma_max_byte_count(struct si_context *sctx) static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
{ {
unsigned max = unsigned max =
sctx->chip_class >= GFX9 ? S_414_BYTE_COUNT_GFX9(~0u) : S_414_BYTE_COUNT_GFX6(~0u); sctx->chip_class >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u);
/* make it aligned for optimal performance */ /* make it aligned for optimal performance */
return max & ~(SI_CPDMA_ALIGNMENT - 1); return max & ~(SI_CPDMA_ALIGNMENT - 1);
@@ -63,16 +63,16 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS); assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS);
if (sctx->chip_class >= GFX9) if (sctx->chip_class >= GFX9)
command |= S_414_BYTE_COUNT_GFX9(size); command |= S_415_BYTE_COUNT_GFX9(size);
else else
command |= S_414_BYTE_COUNT_GFX6(size); command |= S_415_BYTE_COUNT_GFX6(size);
/* Sync flags. */ /* Sync flags. */
if (flags & CP_DMA_SYNC) if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1); header |= S_411_CP_SYNC(1);
if (flags & CP_DMA_RAW_WAIT) if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1); command |= S_415_RAW_WAIT(1);
/* Src and dst flags. */ /* Src and dst flags. */
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) { if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) {
@@ -80,7 +80,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
} else if (flags & CP_DMA_DST_IS_GDS) { } else if (flags & CP_DMA_DST_IS_GDS) {
header |= S_411_DST_SEL(V_411_GDS); header |= S_411_DST_SEL(V_411_GDS);
/* GDS increments the address, not CP. */ /* GDS increments the address, not CP. */
command |= S_414_DAS(V_414_REGISTER) | S_414_DAIC(V_414_NO_INCREMENT); command |= S_415_DAS(V_415_REGISTER) | S_415_DAIC(V_415_NO_INCREMENT);
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
header |= header |=
S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM); S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
@@ -91,7 +91,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
} else if (flags & CP_DMA_SRC_IS_GDS) { } else if (flags & CP_DMA_SRC_IS_GDS) {
header |= S_411_SRC_SEL(V_411_GDS); header |= S_411_SRC_SEL(V_411_GDS);
/* Both of these are required for GDS. It does increment the address. */ /* Both of these are required for GDS. It does increment the address. */
command |= S_414_SAS(V_414_REGISTER) | S_414_SAIC(V_414_NO_INCREMENT); command |= S_415_SAS(V_415_REGISTER) | S_415_SAIC(V_415_NO_INCREMENT);
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
header |= header |=
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM); S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
@@ -408,16 +408,16 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
*/ */
assert(size % SI_CPDMA_ALIGNMENT == 0); assert(size % SI_CPDMA_ALIGNMENT == 0);
assert(address % SI_CPDMA_ALIGNMENT == 0); assert(address % SI_CPDMA_ALIGNMENT == 0);
assert(size < S_414_BYTE_COUNT_GFX6(~0u)); assert(size < S_415_BYTE_COUNT_GFX6(~0u));
uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
uint32_t command = S_414_BYTE_COUNT_GFX6(size); uint32_t command = S_415_BYTE_COUNT_GFX6(size);
if (sctx->chip_class >= GFX9) { if (sctx->chip_class >= GFX9) {
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1); command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
header |= S_411_DST_SEL(V_411_NOWHERE); header |= S_411_DST_SEL(V_411_NOWHERE);
} else { } else {
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1); command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
} }

View File

@@ -247,7 +247,7 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_emit(cs, 4 * i); /* destination in GDS */ radeon_emit(cs, 4 * i); /* destination in GDS */
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target)); radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
} }
radeon_end(); radeon_end();