From f96bbb64d692299705eff26a9eb8b0bd48c919f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 16 Apr 2024 15:19:27 -0400 Subject: [PATCH] radeonsi: add decision code to select when to use compute blit for performance Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/ci/radeonsi-raven-fails.txt | 4 - .../drivers/radeonsi/si_compute_blit.c | 104 ++++++++++++++++-- 2 files changed, 96 insertions(+), 12 deletions(-) diff --git a/src/amd/ci/radeonsi-raven-fails.txt b/src/amd/ci/radeonsi-raven-fails.txt index 8831eb45feb..7a6eaeadc2d 100644 --- a/src/amd/ci/radeonsi-raven-fails.txt +++ b/src/amd/ci/radeonsi-raven-fails.txt @@ -58,12 +58,8 @@ spec@glsl-4.00@execution@conversion@vert-conversion-explicit-dmat4x3-mat4x3,Fail spec@glsl-4.00@execution@conversion@vert-conversion-explicit-dvec4-vec4,Fail spec@glsl-es-3.00@execution@built-in-functions@fs-packhalf2x16,Fail spec@glsl-es-3.00@execution@built-in-functions@vs-packhalf2x16,Fail -spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail -spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp,Fail -spec@khr_texture_compression_astc@sliced-3d-miptree-gl srgb-fp@sRGB decode full precision,Fail spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index a06fd52ae4b..817f9c95de9 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -915,12 +915,14 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, { struct si_texture *sdst = (struct si_texture *)info->dst.resource; struct si_texture *ssrc = (struct si_texture *)info->src.resource; + bool is_2d_tiling = !sdst->surface.is_linear && !sdst->surface.thick_tiling; bool is_3d_tiling = sdst->surface.thick_tiling; bool is_clear = !info->src.resource; unsigned dst_samples = MAX2(1, sdst->buffer.b.b.nr_samples); unsigned src_samples = is_clear ? 1 : MAX2(1, ssrc->buffer.b.b.nr_samples); bool is_resolve = !is_clear && dst_samples == 1 && src_samples >= 2 && !util_format_is_pure_integer(info->dst.format); + bool is_upsampling = !is_clear && src_samples == 1 && dst_samples >= 2; bool sample0_only = src_samples >= 2 && dst_samples == 1 && (info->sample0_only || util_format_is_pure_integer(info->dst.format)); /* Get the channel sizes. */ @@ -967,14 +969,100 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, src_samples > SI_MAX_COMPUTE_BLIT_SAMPLES))) return false; - /* Testing on Navi21 showed that the compute blit is slightly slower than the gfx blit. - * The compute blit is even slower with DCC stores. VP13 CATIA_plane_pencil is a good test - * for that because it's mostly just blits. - * - * TODO: benchmark the performance on gfx11 - */ - if (sctx->gfx_level < GFX11 && sctx->has_graphics && flags & SI_OP_FAIL_IF_SLOW) - return false; + /* Return a failure if a compute blit is slower than a gfx blit. */ + if (sctx->has_graphics && flags & SI_OP_FAIL_IF_SLOW) { + if (is_clear) { + /* Verified on: Tahiti, Hawaii, Tonga, Vega10, Navi10, Navi21, Navi31 */ + if (is_3d_tiling) { + if (sctx->gfx_level == GFX6 && sdst->surface.bpe == 8) + return false; + } else if (is_2d_tiling) { + if (!(sctx->gfx_level == GFX6 && sdst->surface.bpe <= 4 && dst_samples == 1) && + !(sctx->gfx_level == GFX7 && sdst->surface.bpe == 1 && dst_samples == 1)) + return false; + } + } else { + /* For upsampling, image stores don't compress MSAA as good as draws. */ + if (is_upsampling) + return false; + + switch (sctx->gfx_level) { + case GFX6: + case GFX7: + case GFX8: + case GFX9: + case GFX10: + case GFX10_3: + /* Verified on: Tahiti, Hawaii, Tonga, Vega10, Navi10, Navi21 */ + if (is_resolve) { + if (!(sctx->gfx_level == GFX7 && sdst->surface.bpe == 16)) + return false; + } else { + assert(dst_samples == src_samples || sample0_only); + + if (is_2d_tiling) { + if (dst_samples == 1) { + if (sdst->surface.bpe <= 8 && + !(sctx->gfx_level <= GFX7 && sdst->surface.bpe == 1) && + !(sctx->gfx_level == GFX6 && sdst->surface.bpe == 2 && + ssrc->surface.is_linear) && + !(sctx->gfx_level == GFX7 && sdst->surface.bpe >= 2 && + ssrc->surface.is_linear) && + !((sctx->gfx_level == GFX8 || sctx->gfx_level == GFX9) && + sdst->surface.bpe >= 2 && ssrc->surface.is_linear) && + !(sctx->gfx_level == GFX10 && sdst->surface.bpe <= 2 && + ssrc->surface.is_linear) && + !(sctx->gfx_level == GFX10_3 && sdst->surface.bpe == 8 && + ssrc->surface.is_linear)) + return false; + + if (sctx->gfx_level == GFX6 && sdst->surface.bpe == 16 && + ssrc->surface.is_linear && sdst->buffer.b.b.target != PIPE_TEXTURE_3D) + return false; + + if (sdst->surface.bpe == 16 && !ssrc->surface.is_linear && + /* Only GFX6 selects 2D tiling for 128bpp 3D textures. */ + !(sctx->gfx_level == GFX6 && sdst->buffer.b.b.target == PIPE_TEXTURE_3D) && + sctx->gfx_level != GFX7) + return false; + } else { + /* MSAA copies - tested only without FMASK on Navi21. */ + if (sdst->surface.bpe >= 4) + return false; + } + } + } + break; + + case GFX11: + case GFX11_5: + default: + /* Verified on Navi31. */ + if (is_resolve) { + if (!((sdst->surface.bpe <= 2 && src_samples == 2) || + (sdst->surface.bpe == 16 && src_samples == 4))) + return false; + } else { + assert(dst_samples == src_samples || sample0_only); + + if (is_2d_tiling) { + if (sdst->surface.bpe == 2 && ssrc->surface.is_linear && dst_samples == 1) + return false; + + if ((sdst->surface.bpe == 4 || sdst->surface.bpe == 8) && dst_samples == 1) + return false; + + if (sdst->surface.bpe == 16 && dst_samples == 1 && !ssrc->surface.is_linear) + return false; + + if (sdst->surface.bpe == 16 && dst_samples == 8) + return false; + } + } + break; + } + } + } if (sctx->gfx_level < GFX10 && !sctx->has_graphics && vi_dcc_enabled(sdst, info->dst.level)) si_texture_disable_dcc(sctx, sdst);