diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 7bbe5143825..f01d5c1a198 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1233,7 +1233,7 @@ static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) if (unlikely(sctx->sqtt_enabled)) sctx->sqtt_next_event = EventCmdCopyImage; - if (si_compute_blit(sctx, info, NULL, true)) + if (si_compute_blit(sctx, info, NULL, 0, 0, true)) return; si_gfx_blit(ctx, info); diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index b133156b4fd..d7b40c53f14 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -972,7 +972,120 @@ bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex, info.mask = util_format_is_depth_or_stencil(format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA; info.render_condition_enable = render_condition_enable; - return si_compute_blit(sctx, &info, color, fail_if_slow); + return si_compute_blit(sctx, &info, color, 0, 0, fail_if_slow); +} + +bool si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level, + struct pipe_resource *src, unsigned src_level, unsigned dstx, + unsigned dsty, unsigned dstz, const struct pipe_box *src_box, + bool fail_if_slow) +{ + struct si_texture *ssrc = (struct si_texture*)src; + struct si_texture *sdst = (struct si_texture*)dst; + enum pipe_format src_format = util_format_linear(src->format); + enum pipe_format dst_format = util_format_linear(dst->format); + + assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format)); + + /* Interpret as integer values to avoid NaN issues */ + if (!vi_dcc_enabled(ssrc, src_level) && + !vi_dcc_enabled(sdst, dst_level) && + src_format == dst_format && + util_format_is_float(src_format) && + !util_format_is_compressed(src_format)) { + switch(util_format_get_blocksizebits(src_format)) { + case 16: + src_format = dst_format = PIPE_FORMAT_R16_UINT; + break; + case 32: + src_format = dst_format = PIPE_FORMAT_R32_UINT; + break; + case 64: + src_format = dst_format = PIPE_FORMAT_R32G32_UINT; + break; + case 128: + src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; + break; + default: + assert(false); + } + } + + /* Interpret compressed formats as UINT. */ + struct pipe_box new_box; + unsigned src_access = 0, dst_access = 0; + + /* Note that staging copies do compressed<->UINT, so one of the formats is already UINT. */ + if (util_format_is_compressed(src_format) || util_format_is_compressed(dst_format)) { + if (util_format_is_compressed(src_format)) + src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; + if (util_format_is_compressed(dst_format)) + dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; + + dstx = util_format_get_nblocksx(dst_format, dstx); + dsty = util_format_get_nblocksy(dst_format, dsty); + + new_box.x = util_format_get_nblocksx(src_format, src_box->x); + new_box.y = util_format_get_nblocksy(src_format, src_box->y); + new_box.z = src_box->z; + new_box.width = util_format_get_nblocksx(src_format, src_box->width); + new_box.height = util_format_get_nblocksy(src_format, src_box->height); + new_box.depth = src_box->depth; + src_box = &new_box; + + if (ssrc->surface.bpe == 8) + src_format = dst_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ + else + src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ + } + + if (util_format_is_subsampled_422(src_format)) { + assert(src_format == dst_format); + + src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; + dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; + + dstx = util_format_get_nblocksx(src_format, dstx); + + src_format = dst_format = PIPE_FORMAT_R32_UINT; + + /* Interpreting 422 subsampled format (16 bpp) as 32 bpp + * should force us to divide src_box->x, dstx and width by 2. + * But given that ac_surface allocates this format as 32 bpp + * and that surf_size is then modified to pack the values + * we must keep the original values to get the correct results. + */ + } + + /* SNORM blitting has precision issues. Use the SINT equivalent instead, which doesn't + * force DCC decompression. + */ + if (util_format_is_snorm(dst_format)) + src_format = dst_format = util_format_snorm_to_sint(dst_format); + + struct pipe_blit_info info; + memset(&info, 0, sizeof(info)); + info.dst.resource = dst; + info.dst.level = dst_level; + info.dst.box.x = dstx; + info.dst.box.y = dsty; + info.dst.box.z = dstz; + info.dst.box.width = src_box->width; + info.dst.box.height = src_box->height; + info.dst.box.depth = src_box->depth; + info.dst.format = dst_format; + info.src.resource = src; + info.src.level = src_level; + info.src.box = *src_box; + info.src.format = src_format; + info.mask = util_format_is_depth_or_stencil(dst_format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA; + + /* Only the compute blit can copy compressed and subsampled images. */ + fail_if_slow &= !dst_access && !src_access; + + bool success = si_compute_blit(sctx, &info, NULL, dst_access, src_access, fail_if_slow); + assert((!dst_access && !src_access) || success); + return success; } typedef struct { @@ -980,7 +1093,8 @@ typedef struct { } uvec3; bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, - const union pipe_color_union *clear_color, bool fail_if_slow) + const union pipe_color_union *clear_color, unsigned dst_access, + unsigned src_access, bool fail_if_slow) { struct si_texture *sdst = (struct si_texture *)info->dst.resource; struct si_texture *ssrc = (struct si_texture *)info->src.resource; @@ -1019,7 +1133,8 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, info->dst_sample != 0 || /* Image stores support DCC since GFX10. Return only for gfx queues. DCC is disabled * for compute queues farther below. */ - (sctx->gfx_level < GFX10 && sctx->has_graphics && vi_dcc_enabled(sdst, info->dst.level)) || + (sctx->gfx_level < GFX10 && sctx->has_graphics && vi_dcc_enabled(sdst, info->dst.level) && + !src_access && !dst_access) || info->alpha_blend || info->num_window_rectangles || info->scissor_enable || @@ -1308,7 +1423,7 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, if (!is_clear) { image[0].resource = info->src.resource; - image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ; + image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ | src_access; image[0].format = info->src.format; image[0].u.tex.level = info->src.level; image[0].u.tex.first_layer = 0; @@ -1316,7 +1431,7 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, } image[dst_index].resource = info->dst.resource; - image[dst_index].shader_access = image[dst_index].access = PIPE_IMAGE_ACCESS_WRITE; + image[dst_index].shader_access = image[dst_index].access = PIPE_IMAGE_ACCESS_WRITE | dst_access; image[dst_index].format = info->dst.format; image[dst_index].u.tex.level = info->dst.level; image[dst_index].u.tex.first_layer = 0; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index c0705fc3464..53c5cb2d03d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1517,8 +1517,13 @@ bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex, enum pipe_format format, unsigned level, const struct pipe_box *box, const union pipe_color_union *color, bool render_condition_enable, bool fail_if_slow); +bool si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level, + struct pipe_resource *src, unsigned src_level, unsigned dstx, + unsigned dsty, unsigned dstz, const struct pipe_box *src_box, + bool fail_if_slow); bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, - const union pipe_color_union *color, bool fail_if_slow); + const union pipe_color_union *clear_color, unsigned dst_access, + unsigned src_access, bool fail_if_slow); void si_init_compute_blit_functions(struct si_context *sctx); /* si_cp_dma.c */ diff --git a/src/gallium/drivers/radeonsi/si_test_image_copy_region.c b/src/gallium/drivers/radeonsi/si_test_image_copy_region.c index 5c5c2a9f47c..49c3f170a3c 100644 --- a/src/gallium/drivers/radeonsi/si_test_image_copy_region.c +++ b/src/gallium/drivers/radeonsi/si_test_image_copy_region.c @@ -929,7 +929,7 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags) if (only_cb_resolve) success = si_msaa_resolve_blit_via_CB(ctx, &info); else - success = si_compute_blit(sctx, &info, NULL, false); + success = si_compute_blit(sctx, &info, NULL, 0, 0, false); if (success) { printf(" %-7s", only_cb_resolve ? "resolve" : "comp");