amd: fix parsing the last dword of DMA_DATA packets
It was parsing it as SQ_WAVE_GPR_ALLOC instead of COMMAND. Change the offset to an odd number to work around it. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9795>
This commit is contained in:
@@ -394,7 +394,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||||||
ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
|
||||||
ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
|
||||||
ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
|
||||||
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||||
break;
|
break;
|
||||||
case PKT3_DMA_DATA:
|
case PKT3_DMA_DATA:
|
||||||
ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
|
||||||
@@ -402,7 +402,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||||||
ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
|
||||||
ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
|
||||||
ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
|
||||||
ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
|
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||||
break;
|
break;
|
||||||
case PKT3_INDIRECT_BUFFER_SI:
|
case PKT3_INDIRECT_BUFFER_SI:
|
||||||
case PKT3_INDIRECT_BUFFER_CONST:
|
case PKT3_INDIRECT_BUFFER_CONST:
|
||||||
|
@@ -122,14 +122,15 @@
|
|||||||
},
|
},
|
||||||
"register_mappings": [
|
"register_mappings": [
|
||||||
{
|
{
|
||||||
|
"comment": "This is at offset 0x415 instead of 0x414 due to a conflict with SQ_WAVE_GPR_ALLOC",
|
||||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
|
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
|
||||||
"map": {"at": 1044, "to": "pkt3"},
|
"map": {"at": 1045, "to": "pkt3"},
|
||||||
"name": "COMMAND",
|
"name": "COMMAND",
|
||||||
"type_ref": "COMMAND"
|
"type_ref": "COMMAND"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"chips": ["gfx9", "gfx10", "gfx103"],
|
"chips": ["gfx9", "gfx10", "gfx103"],
|
||||||
"map": {"at": 1044, "to": "pkt3"},
|
"map": {"at": 1045, "to": "pkt3"},
|
||||||
"name": "COMMAND",
|
"name": "COMMAND",
|
||||||
"type_ref": "COMMAND_gfx9"
|
"type_ref": "COMMAND_gfx9"
|
||||||
},
|
},
|
||||||
|
@@ -7128,8 +7128,8 @@ gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
radeon_emit(cs, 4 * i); /* destination in GDS */
|
radeon_emit(cs, 4 * i); /* destination in GDS */
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) |
|
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) |
|
||||||
S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||||
}
|
}
|
||||||
|
|
||||||
radv_set_streamout_enable(cmd_buffer, true);
|
radv_set_streamout_enable(cmd_buffer, true);
|
||||||
|
@@ -1647,8 +1647,8 @@ si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
|
static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
|
||||||
{
|
{
|
||||||
unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
|
unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
|
||||||
S_414_BYTE_COUNT_GFX9(~0u) :
|
S_415_BYTE_COUNT_GFX9(~0u) :
|
||||||
S_414_BYTE_COUNT_GFX6(~0u);
|
S_415_BYTE_COUNT_GFX6(~0u);
|
||||||
|
|
||||||
/* make it aligned for optimal performance */
|
/* make it aligned for optimal performance */
|
||||||
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||||
@@ -1669,22 +1669,22 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
|
|
||||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
|
||||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
||||||
command |= S_414_BYTE_COUNT_GFX9(size);
|
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||||
else
|
else
|
||||||
command |= S_414_BYTE_COUNT_GFX6(size);
|
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||||
|
|
||||||
/* Sync flags. */
|
/* Sync flags. */
|
||||||
if (flags & CP_DMA_SYNC)
|
if (flags & CP_DMA_SYNC)
|
||||||
header |= S_411_CP_SYNC(1);
|
header |= S_411_CP_SYNC(1);
|
||||||
else {
|
else {
|
||||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
||||||
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
|
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||||
else
|
else
|
||||||
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
|
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & CP_DMA_RAW_WAIT)
|
if (flags & CP_DMA_RAW_WAIT)
|
||||||
command |= S_414_RAW_WAIT(1);
|
command |= S_415_RAW_WAIT(1);
|
||||||
|
|
||||||
/* Src and dst flags. */
|
/* Src and dst flags. */
|
||||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
|
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||||
|
@@ -43,7 +43,7 @@
|
|||||||
static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
|
static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
unsigned max =
|
unsigned max =
|
||||||
sctx->chip_class >= GFX9 ? S_414_BYTE_COUNT_GFX9(~0u) : S_414_BYTE_COUNT_GFX6(~0u);
|
sctx->chip_class >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u);
|
||||||
|
|
||||||
/* make it aligned for optimal performance */
|
/* make it aligned for optimal performance */
|
||||||
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||||
@@ -63,16 +63,16 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
|||||||
assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS);
|
assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX9)
|
if (sctx->chip_class >= GFX9)
|
||||||
command |= S_414_BYTE_COUNT_GFX9(size);
|
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||||
else
|
else
|
||||||
command |= S_414_BYTE_COUNT_GFX6(size);
|
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||||
|
|
||||||
/* Sync flags. */
|
/* Sync flags. */
|
||||||
if (flags & CP_DMA_SYNC)
|
if (flags & CP_DMA_SYNC)
|
||||||
header |= S_411_CP_SYNC(1);
|
header |= S_411_CP_SYNC(1);
|
||||||
|
|
||||||
if (flags & CP_DMA_RAW_WAIT)
|
if (flags & CP_DMA_RAW_WAIT)
|
||||||
command |= S_414_RAW_WAIT(1);
|
command |= S_415_RAW_WAIT(1);
|
||||||
|
|
||||||
/* Src and dst flags. */
|
/* Src and dst flags. */
|
||||||
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) {
|
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) {
|
||||||
@@ -80,7 +80,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
|||||||
} else if (flags & CP_DMA_DST_IS_GDS) {
|
} else if (flags & CP_DMA_DST_IS_GDS) {
|
||||||
header |= S_411_DST_SEL(V_411_GDS);
|
header |= S_411_DST_SEL(V_411_GDS);
|
||||||
/* GDS increments the address, not CP. */
|
/* GDS increments the address, not CP. */
|
||||||
command |= S_414_DAS(V_414_REGISTER) | S_414_DAIC(V_414_NO_INCREMENT);
|
command |= S_415_DAS(V_415_REGISTER) | S_415_DAIC(V_415_NO_INCREMENT);
|
||||||
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
|
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
|
||||||
header |=
|
header |=
|
||||||
S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
|
S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
|
||||||
@@ -91,7 +91,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
|||||||
} else if (flags & CP_DMA_SRC_IS_GDS) {
|
} else if (flags & CP_DMA_SRC_IS_GDS) {
|
||||||
header |= S_411_SRC_SEL(V_411_GDS);
|
header |= S_411_SRC_SEL(V_411_GDS);
|
||||||
/* Both of these are required for GDS. It does increment the address. */
|
/* Both of these are required for GDS. It does increment the address. */
|
||||||
command |= S_414_SAS(V_414_REGISTER) | S_414_SAIC(V_414_NO_INCREMENT);
|
command |= S_415_SAS(V_415_REGISTER) | S_415_SAIC(V_415_NO_INCREMENT);
|
||||||
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
|
} else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
|
||||||
header |=
|
header |=
|
||||||
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
|
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
|
||||||
@@ -408,16 +408,16 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
|
|||||||
*/
|
*/
|
||||||
assert(size % SI_CPDMA_ALIGNMENT == 0);
|
assert(size % SI_CPDMA_ALIGNMENT == 0);
|
||||||
assert(address % SI_CPDMA_ALIGNMENT == 0);
|
assert(address % SI_CPDMA_ALIGNMENT == 0);
|
||||||
assert(size < S_414_BYTE_COUNT_GFX6(~0u));
|
assert(size < S_415_BYTE_COUNT_GFX6(~0u));
|
||||||
|
|
||||||
uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
||||||
uint32_t command = S_414_BYTE_COUNT_GFX6(size);
|
uint32_t command = S_415_BYTE_COUNT_GFX6(size);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX9) {
|
if (sctx->chip_class >= GFX9) {
|
||||||
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
|
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||||
header |= S_411_DST_SEL(V_411_NOWHERE);
|
header |= S_411_DST_SEL(V_411_NOWHERE);
|
||||||
} else {
|
} else {
|
||||||
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
|
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
|
||||||
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -247,7 +247,7 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
|
|||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
radeon_emit(cs, 4 * i); /* destination in GDS */
|
radeon_emit(cs, 4 * i); /* destination in GDS */
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||||
}
|
}
|
||||||
radeon_end();
|
radeon_end();
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user