radeonsi: add a new version of si_compute_copy_image using the compute blit
It's faster and handles more stuff. This is mostly the same code as the old version, but it calls si_compute_blit at the end. A later commit will remove the old version, so that there is no code duplication. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28917>
This commit is contained in:
@@ -1233,7 +1233,7 @@ static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
|
|||||||
if (unlikely(sctx->sqtt_enabled))
|
if (unlikely(sctx->sqtt_enabled))
|
||||||
sctx->sqtt_next_event = EventCmdCopyImage;
|
sctx->sqtt_next_event = EventCmdCopyImage;
|
||||||
|
|
||||||
if (si_compute_blit(sctx, info, NULL, true))
|
if (si_compute_blit(sctx, info, NULL, 0, 0, true))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
si_gfx_blit(ctx, info);
|
si_gfx_blit(ctx, info);
|
||||||
|
@@ -972,7 +972,120 @@ bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex,
|
|||||||
info.mask = util_format_is_depth_or_stencil(format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA;
|
info.mask = util_format_is_depth_or_stencil(format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA;
|
||||||
info.render_condition_enable = render_condition_enable;
|
info.render_condition_enable = render_condition_enable;
|
||||||
|
|
||||||
return si_compute_blit(sctx, &info, color, fail_if_slow);
|
return si_compute_blit(sctx, &info, color, 0, 0, fail_if_slow);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
||||||
|
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
||||||
|
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
|
||||||
|
bool fail_if_slow)
|
||||||
|
{
|
||||||
|
struct si_texture *ssrc = (struct si_texture*)src;
|
||||||
|
struct si_texture *sdst = (struct si_texture*)dst;
|
||||||
|
enum pipe_format src_format = util_format_linear(src->format);
|
||||||
|
enum pipe_format dst_format = util_format_linear(dst->format);
|
||||||
|
|
||||||
|
assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
|
||||||
|
|
||||||
|
/* Interpret as integer values to avoid NaN issues */
|
||||||
|
if (!vi_dcc_enabled(ssrc, src_level) &&
|
||||||
|
!vi_dcc_enabled(sdst, dst_level) &&
|
||||||
|
src_format == dst_format &&
|
||||||
|
util_format_is_float(src_format) &&
|
||||||
|
!util_format_is_compressed(src_format)) {
|
||||||
|
switch(util_format_get_blocksizebits(src_format)) {
|
||||||
|
case 16:
|
||||||
|
src_format = dst_format = PIPE_FORMAT_R16_UINT;
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
src_format = dst_format = PIPE_FORMAT_R32_UINT;
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
src_format = dst_format = PIPE_FORMAT_R32G32_UINT;
|
||||||
|
break;
|
||||||
|
case 128:
|
||||||
|
src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Interpret compressed formats as UINT. */
|
||||||
|
struct pipe_box new_box;
|
||||||
|
unsigned src_access = 0, dst_access = 0;
|
||||||
|
|
||||||
|
/* Note that staging copies do compressed<->UINT, so one of the formats is already UINT. */
|
||||||
|
if (util_format_is_compressed(src_format) || util_format_is_compressed(dst_format)) {
|
||||||
|
if (util_format_is_compressed(src_format))
|
||||||
|
src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||||
|
if (util_format_is_compressed(dst_format))
|
||||||
|
dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||||
|
|
||||||
|
dstx = util_format_get_nblocksx(dst_format, dstx);
|
||||||
|
dsty = util_format_get_nblocksy(dst_format, dsty);
|
||||||
|
|
||||||
|
new_box.x = util_format_get_nblocksx(src_format, src_box->x);
|
||||||
|
new_box.y = util_format_get_nblocksy(src_format, src_box->y);
|
||||||
|
new_box.z = src_box->z;
|
||||||
|
new_box.width = util_format_get_nblocksx(src_format, src_box->width);
|
||||||
|
new_box.height = util_format_get_nblocksy(src_format, src_box->height);
|
||||||
|
new_box.depth = src_box->depth;
|
||||||
|
src_box = &new_box;
|
||||||
|
|
||||||
|
if (ssrc->surface.bpe == 8)
|
||||||
|
src_format = dst_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
|
||||||
|
else
|
||||||
|
src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (util_format_is_subsampled_422(src_format)) {
|
||||||
|
assert(src_format == dst_format);
|
||||||
|
|
||||||
|
src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||||
|
dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||||
|
|
||||||
|
dstx = util_format_get_nblocksx(src_format, dstx);
|
||||||
|
|
||||||
|
src_format = dst_format = PIPE_FORMAT_R32_UINT;
|
||||||
|
|
||||||
|
/* Interpreting 422 subsampled format (16 bpp) as 32 bpp
|
||||||
|
* should force us to divide src_box->x, dstx and width by 2.
|
||||||
|
* But given that ac_surface allocates this format as 32 bpp
|
||||||
|
* and that surf_size is then modified to pack the values
|
||||||
|
* we must keep the original values to get the correct results.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
/* SNORM blitting has precision issues. Use the SINT equivalent instead, which doesn't
|
||||||
|
* force DCC decompression.
|
||||||
|
*/
|
||||||
|
if (util_format_is_snorm(dst_format))
|
||||||
|
src_format = dst_format = util_format_snorm_to_sint(dst_format);
|
||||||
|
|
||||||
|
struct pipe_blit_info info;
|
||||||
|
memset(&info, 0, sizeof(info));
|
||||||
|
info.dst.resource = dst;
|
||||||
|
info.dst.level = dst_level;
|
||||||
|
info.dst.box.x = dstx;
|
||||||
|
info.dst.box.y = dsty;
|
||||||
|
info.dst.box.z = dstz;
|
||||||
|
info.dst.box.width = src_box->width;
|
||||||
|
info.dst.box.height = src_box->height;
|
||||||
|
info.dst.box.depth = src_box->depth;
|
||||||
|
info.dst.format = dst_format;
|
||||||
|
info.src.resource = src;
|
||||||
|
info.src.level = src_level;
|
||||||
|
info.src.box = *src_box;
|
||||||
|
info.src.format = src_format;
|
||||||
|
info.mask = util_format_is_depth_or_stencil(dst_format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA;
|
||||||
|
|
||||||
|
/* Only the compute blit can copy compressed and subsampled images. */
|
||||||
|
fail_if_slow &= !dst_access && !src_access;
|
||||||
|
|
||||||
|
bool success = si_compute_blit(sctx, &info, NULL, dst_access, src_access, fail_if_slow);
|
||||||
|
assert((!dst_access && !src_access) || success);
|
||||||
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -980,7 +1093,8 @@ typedef struct {
|
|||||||
} uvec3;
|
} uvec3;
|
||||||
|
|
||||||
bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
||||||
const union pipe_color_union *clear_color, bool fail_if_slow)
|
const union pipe_color_union *clear_color, unsigned dst_access,
|
||||||
|
unsigned src_access, bool fail_if_slow)
|
||||||
{
|
{
|
||||||
struct si_texture *sdst = (struct si_texture *)info->dst.resource;
|
struct si_texture *sdst = (struct si_texture *)info->dst.resource;
|
||||||
struct si_texture *ssrc = (struct si_texture *)info->src.resource;
|
struct si_texture *ssrc = (struct si_texture *)info->src.resource;
|
||||||
@@ -1019,7 +1133,8 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
|||||||
info->dst_sample != 0 ||
|
info->dst_sample != 0 ||
|
||||||
/* Image stores support DCC since GFX10. Return only for gfx queues. DCC is disabled
|
/* Image stores support DCC since GFX10. Return only for gfx queues. DCC is disabled
|
||||||
* for compute queues farther below. */
|
* for compute queues farther below. */
|
||||||
(sctx->gfx_level < GFX10 && sctx->has_graphics && vi_dcc_enabled(sdst, info->dst.level)) ||
|
(sctx->gfx_level < GFX10 && sctx->has_graphics && vi_dcc_enabled(sdst, info->dst.level) &&
|
||||||
|
!src_access && !dst_access) ||
|
||||||
info->alpha_blend ||
|
info->alpha_blend ||
|
||||||
info->num_window_rectangles ||
|
info->num_window_rectangles ||
|
||||||
info->scissor_enable ||
|
info->scissor_enable ||
|
||||||
@@ -1308,7 +1423,7 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
|||||||
|
|
||||||
if (!is_clear) {
|
if (!is_clear) {
|
||||||
image[0].resource = info->src.resource;
|
image[0].resource = info->src.resource;
|
||||||
image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ;
|
image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ | src_access;
|
||||||
image[0].format = info->src.format;
|
image[0].format = info->src.format;
|
||||||
image[0].u.tex.level = info->src.level;
|
image[0].u.tex.level = info->src.level;
|
||||||
image[0].u.tex.first_layer = 0;
|
image[0].u.tex.first_layer = 0;
|
||||||
@@ -1316,7 +1431,7 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
image[dst_index].resource = info->dst.resource;
|
image[dst_index].resource = info->dst.resource;
|
||||||
image[dst_index].shader_access = image[dst_index].access = PIPE_IMAGE_ACCESS_WRITE;
|
image[dst_index].shader_access = image[dst_index].access = PIPE_IMAGE_ACCESS_WRITE | dst_access;
|
||||||
image[dst_index].format = info->dst.format;
|
image[dst_index].format = info->dst.format;
|
||||||
image[dst_index].u.tex.level = info->dst.level;
|
image[dst_index].u.tex.level = info->dst.level;
|
||||||
image[dst_index].u.tex.first_layer = 0;
|
image[dst_index].u.tex.first_layer = 0;
|
||||||
|
@@ -1517,8 +1517,13 @@ bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex,
|
|||||||
enum pipe_format format, unsigned level, const struct pipe_box *box,
|
enum pipe_format format, unsigned level, const struct pipe_box *box,
|
||||||
const union pipe_color_union *color, bool render_condition_enable,
|
const union pipe_color_union *color, bool render_condition_enable,
|
||||||
bool fail_if_slow);
|
bool fail_if_slow);
|
||||||
|
bool si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
||||||
|
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
||||||
|
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
|
||||||
|
bool fail_if_slow);
|
||||||
bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
||||||
const union pipe_color_union *color, bool fail_if_slow);
|
const union pipe_color_union *clear_color, unsigned dst_access,
|
||||||
|
unsigned src_access, bool fail_if_slow);
|
||||||
void si_init_compute_blit_functions(struct si_context *sctx);
|
void si_init_compute_blit_functions(struct si_context *sctx);
|
||||||
|
|
||||||
/* si_cp_dma.c */
|
/* si_cp_dma.c */
|
||||||
|
@@ -929,7 +929,7 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||||||
if (only_cb_resolve)
|
if (only_cb_resolve)
|
||||||
success = si_msaa_resolve_blit_via_CB(ctx, &info);
|
success = si_msaa_resolve_blit_via_CB(ctx, &info);
|
||||||
else
|
else
|
||||||
success = si_compute_blit(sctx, &info, NULL, false);
|
success = si_compute_blit(sctx, &info, NULL, 0, 0, false);
|
||||||
|
|
||||||
if (success) {
|
if (success) {
|
||||||
printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
|
printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
|
||||||
|
Reference in New Issue
Block a user