radv: prefer CP DMA for GTT buffer copies/clears on dGPUs due to slow PCIe
The CP DMA bandwidth is always better than PCIe, so I think wasting compute resources is not a good idea. This is only enabled on GFX10+ because untested on older gens and also because RadeonSI does that. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8763>
This commit is contained in:

committed by
Marge Bot

parent
f5a02d16a6
commit
e870796113
@@ -351,17 +351,38 @@ static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
|
||||
radv_meta_restore(&saved_state, cmd_buffer);
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_prefer_compute_dma(const struct radv_device *device,
|
||||
uint64_t size,
|
||||
struct radeon_winsys_bo *src_bo,
|
||||
struct radeon_winsys_bo *dst_bo)
|
||||
{
|
||||
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||
device->physical_device->rad_info.has_dedicated_vram) {
|
||||
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
|
||||
!(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
|
||||
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
|
||||
use_compute = false;
|
||||
}
|
||||
}
|
||||
|
||||
return use_compute;
|
||||
}
|
||||
|
||||
uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
|
||||
const struct radv_image *image,
|
||||
struct radeon_winsys_bo *bo,
|
||||
uint64_t offset, uint64_t size, uint32_t value)
|
||||
{
|
||||
bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
|
||||
uint32_t flush_bits = 0;
|
||||
|
||||
assert(!(offset & 3));
|
||||
assert(!(size & 3));
|
||||
|
||||
if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
|
||||
if (use_compute) {
|
||||
cmd_buffer->state.flush_bits |=
|
||||
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
|
||||
|
||||
@@ -387,7 +408,10 @@ void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
|
||||
uint64_t src_offset, uint64_t dst_offset,
|
||||
uint64_t size)
|
||||
{
|
||||
if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
|
||||
bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
|
||||
radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
|
||||
|
||||
if (use_compute)
|
||||
copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
|
||||
src_offset, dst_offset, size);
|
||||
else if (size) {
|
||||
|
Reference in New Issue
Block a user