radeonsi: fix a perf regression due to slow reply from GEM_WAIT_IDLE for timeout=0
It sometimes takes 1 ms to return with timeout=0, which is unacceptable.
Fixes: 4194774edf
- radeonsi: move barriers out of si_launch_grid_internal_ssbos
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32877>
This commit is contained in:
@@ -497,7 +497,7 @@ static bool si_is_buffer_idle(struct si_context *sctx, struct si_resource *buf,
|
||||
unsigned usage)
|
||||
{
|
||||
return !si_cs_is_buffer_referenced(sctx, buf->buf, usage) &&
|
||||
sctx->ws->buffer_wait(sctx->ws, buf->buf, 0, usage);
|
||||
sctx->ws->buffer_wait(sctx->ws, buf->buf, 0, usage | RADEON_USAGE_DISALLOW_SLOW_REPLY);
|
||||
}
|
||||
|
||||
void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags,
|
||||
|
@@ -188,7 +188,12 @@ enum radeon_ctx_pstate
|
||||
#define RADEON_PRIO_SHADER_RINGS (1 << 22)
|
||||
#define RADEON_PRIO_SCRATCH_BUFFER (1 << 23)
|
||||
|
||||
#define RADEON_ALL_PRIORITIES (RADEON_USAGE_READ - 1)
|
||||
#define RADEON_ALL_PRIORITIES BITFIELD_MASK(24)
|
||||
|
||||
/* When passed to radeon_winsys::buffer_wait, it disallows using the DRM ioctl for timeout=0
|
||||
* queries because it can take ~1 ms to return, reducing FPS.
|
||||
*/
|
||||
#define RADEON_USAGE_DISALLOW_SLOW_REPLY (1 << 26)
|
||||
|
||||
/* Upper bits of priorities are used by usage flags. */
|
||||
#define RADEON_USAGE_READ (1 << 27)
|
||||
@@ -375,6 +380,13 @@ struct radeon_winsys {
|
||||
* The timeout of 0 will only return the status.
|
||||
* The timeout of OS_TIMEOUT_INFINITE will always wait until the buffer
|
||||
* is idle.
|
||||
*
|
||||
* usage is RADEON_USAGE_READ/WRITE.
|
||||
*
|
||||
* Checking whether a buffer is idle using timeout=0 can take 1 ms even if the DRM ioctl is
|
||||
* used, reducing our FPS to several hundreds. To prevent that, set
|
||||
* RADEON_USAGE_DISALLOW_SLOW_REPLY, which will return busy. This is a workaround for kernel
|
||||
* inefficiency.
|
||||
*/
|
||||
bool (*buffer_wait)(struct radeon_winsys *ws, struct pb_buffer_lean *buf,
|
||||
uint64_t timeout, unsigned usage);
|
||||
|
@@ -103,6 +103,12 @@ static bool amdgpu_bo_wait(struct radeon_winsys *rws,
|
||||
bool buffer_busy = true;
|
||||
int r;
|
||||
|
||||
/* The GEM_WAIT_IDLE ioctl with timeout=0 can take up to 1 ms to return. This is a kernel
|
||||
* inefficiency. This flag indicates whether it's better to return busy than wait for 1 ms.
|
||||
*/
|
||||
if (timeout == 0 && usage & RADEON_USAGE_DISALLOW_SLOW_REPLY)
|
||||
return false;
|
||||
|
||||
r = ac_drm_bo_wait_for_idle(aws->fd, get_real_bo(bo)->kms_handle, timeout, &buffer_busy);
|
||||
if (r)
|
||||
fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__, r);
|
||||
|
Reference in New Issue
Block a user