ac,winsys/amdgpu: align IBs the same as the kernel
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5603>
This commit is contained in:
@@ -561,6 +561,17 @@ bool ac_query_gpu_info(int fd, void *dev_p,
|
||||
info->num_rings[RING_VCN_ENC] = util_bitcount(vcn_enc.available_rings);
|
||||
info->num_rings[RING_VCN_JPEG] = util_bitcount(vcn_jpeg.available_rings);
|
||||
|
||||
/* This is "align_mask" copied from the kernel, maximums of all IP versions. */
|
||||
info->ib_pad_dw_mask[RING_GFX] = 0xff;
|
||||
info->ib_pad_dw_mask[RING_COMPUTE] = 0xff;
|
||||
info->ib_pad_dw_mask[RING_DMA] = 0xf;
|
||||
info->ib_pad_dw_mask[RING_UVD] = 0xf;
|
||||
info->ib_pad_dw_mask[RING_VCE] = 0x3f;
|
||||
info->ib_pad_dw_mask[RING_UVD_ENC] = 0x3f;
|
||||
info->ib_pad_dw_mask[RING_VCN_DEC] = 0xf;
|
||||
info->ib_pad_dw_mask[RING_VCN_ENC] = 0x3f;
|
||||
info->ib_pad_dw_mask[RING_VCN_JPEG] = 0xf;
|
||||
|
||||
/* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
|
||||
* on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
|
||||
* SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.
|
||||
@@ -682,7 +693,11 @@ bool ac_query_gpu_info(int fd, void *dev_p,
|
||||
/* GFX10 and maybe GFX9 need this alignment for cache coherency. */
|
||||
if (info->chip_class >= GFX9)
|
||||
ib_align = MAX2(ib_align, info->tcc_cache_line_size);
|
||||
assert(ib_align);
|
||||
/* The kernel pads gfx and compute IBs to 256 dwords since:
|
||||
* 66f3b2d527154bd258a57c8815004b5964aa1cf5
|
||||
* Do the same.
|
||||
*/
|
||||
ib_align = MAX2(ib_align, 1024);
|
||||
info->ib_alignment = ib_align;
|
||||
|
||||
if ((info->drm_minor >= 31 &&
|
||||
|
@@ -59,6 +59,7 @@ struct radeon_info {
|
||||
/* Features. */
|
||||
bool has_graphics; /* false if the chip is compute-only */
|
||||
uint32_t num_rings[NUM_RING_TYPES];
|
||||
uint32_t ib_pad_dw_mask[NUM_RING_TYPES];
|
||||
bool has_clear_state;
|
||||
bool has_distributed_tess;
|
||||
bool has_dcc_constant_encode;
|
||||
|
@@ -1097,14 +1097,16 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw,
|
||||
/* This space was originally reserved. */
|
||||
rcs->current.max_dw += cs_epilog_dw;
|
||||
|
||||
/* Pad with NOPs and add INDIRECT_BUFFER packet */
|
||||
while ((rcs->current.cdw & 7) != 4)
|
||||
/* Pad with NOPs but leave 4 dwords for INDIRECT_BUFFER. */
|
||||
uint32_t ib_pad_dw_mask = cs->ctx->ws->info.ib_pad_dw_mask[cs->ring_type];
|
||||
while ((rcs->current.cdw & ib_pad_dw_mask) != ib_pad_dw_mask - 3)
|
||||
radeon_emit(rcs, PKT3_NOP_PAD);
|
||||
|
||||
radeon_emit(rcs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
|
||||
radeon_emit(rcs, va);
|
||||
radeon_emit(rcs, va >> 32);
|
||||
new_ptr_ib_size = &rcs->current.buf[rcs->current.cdw++];
|
||||
assert((rcs->current.cdw & ib_pad_dw_mask) == 0);
|
||||
|
||||
assert((rcs->current.cdw & 7) == 0);
|
||||
assert(rcs->current.cdw <= rcs->current.max_dw);
|
||||
@@ -1664,25 +1666,28 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
struct amdgpu_winsys *ws = cs->ctx->ws;
|
||||
int error_code = 0;
|
||||
uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ring_type];
|
||||
|
||||
rcs->current.max_dw += amdgpu_cs_epilog_dws(cs);
|
||||
|
||||
/* Pad the IB according to the mask. */
|
||||
switch (cs->ring_type) {
|
||||
case RING_DMA:
|
||||
/* pad DMA ring to 8 DWs */
|
||||
if (ws->info.chip_class <= GFX6) {
|
||||
while (rcs->current.cdw & 7)
|
||||
while (rcs->current.cdw & ib_pad_dw_mask)
|
||||
radeon_emit(rcs, 0xf0000000); /* NOP packet */
|
||||
} else {
|
||||
while (rcs->current.cdw & ib_pad_dw_mask)
|
||||
radeon_emit(rcs, 0x00000000); /* NOP packet */
|
||||
}
|
||||
break;
|
||||
case RING_GFX:
|
||||
case RING_COMPUTE:
|
||||
/* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
|
||||
if (ws->info.gfx_ib_pad_with_type2) {
|
||||
while (rcs->current.cdw & 7)
|
||||
while (rcs->current.cdw & ib_pad_dw_mask)
|
||||
radeon_emit(rcs, PKT2_NOP_PAD);
|
||||
} else {
|
||||
while (rcs->current.cdw & 7)
|
||||
while (rcs->current.cdw & ib_pad_dw_mask)
|
||||
radeon_emit(rcs, PKT3_NOP_PAD);
|
||||
}
|
||||
if (cs->ring_type == RING_GFX)
|
||||
@@ -1690,25 +1695,25 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
||||
|
||||
/* Also pad secondary IBs. */
|
||||
if (cs->compute_ib.ib_mapped) {
|
||||
while (cs->compute_ib.base.current.cdw & 7)
|
||||
while (cs->compute_ib.base.current.cdw & ib_pad_dw_mask)
|
||||
radeon_emit(&cs->compute_ib.base, PKT3_NOP_PAD);
|
||||
}
|
||||
break;
|
||||
case RING_UVD:
|
||||
case RING_UVD_ENC:
|
||||
while (rcs->current.cdw & 15)
|
||||
while (rcs->current.cdw & ib_pad_dw_mask)
|
||||
radeon_emit(rcs, 0x80000000); /* type2 nop packet */
|
||||
break;
|
||||
case RING_VCN_JPEG:
|
||||
if (rcs->current.cdw % 2)
|
||||
assert(0);
|
||||
while (rcs->current.cdw & 15) {
|
||||
while (rcs->current.cdw & ib_pad_dw_mask) {
|
||||
radeon_emit(rcs, 0x60000000); /* nop packet */
|
||||
radeon_emit(rcs, 0x00000000);
|
||||
}
|
||||
break;
|
||||
case RING_VCN_DEC:
|
||||
while (rcs->current.cdw & 15)
|
||||
while (rcs->current.cdw & ib_pad_dw_mask)
|
||||
radeon_emit(rcs, 0x81ff); /* nop packet */
|
||||
break;
|
||||
default:
|
||||
|
Reference in New Issue
Block a user