radeonsi: add support for displayable DCC for multi-RB chips
A compute shader is used to reorder DCC data from aligned to unaligned.
This commit is contained in:
@@ -504,6 +504,7 @@ void ac_print_gpu_info(struct radeon_info *info)
|
||||
printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
|
||||
|
||||
printf(" use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned);
|
||||
printf(" use_display_dcc_with_retile_blit = %u\n", info->use_display_dcc_with_retile_blit);
|
||||
|
||||
printf("Memory info:\n");
|
||||
printf(" pte_fragment_size = %u\n", info->pte_fragment_size);
|
||||
|
@@ -56,8 +56,11 @@ struct radeon_info {
|
||||
uint32_t clock_crystal_freq;
|
||||
uint32_t tcc_cache_line_size;
|
||||
|
||||
/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
|
||||
/* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
|
||||
bool use_display_dcc_unaligned;
|
||||
/* Allocate both aligned and unaligned DCC and use the retile blit. */
|
||||
bool use_display_dcc_with_retile_blit;
|
||||
|
||||
/* Memory info. */
|
||||
uint32_t pte_fragment_size;
|
||||
|
@@ -1079,6 +1079,7 @@ gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
|
||||
}
|
||||
|
||||
static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
const struct radeon_info *info,
|
||||
const struct ac_surf_config *config,
|
||||
struct radeon_surf *surf, bool compressed,
|
||||
ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
|
||||
@@ -1218,7 +1219,6 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
|
||||
surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
|
||||
surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
|
||||
surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
|
||||
surf->dcc_size = dout.dccRamSize;
|
||||
surf->dcc_alignment = dout.dccRamBaseAlign;
|
||||
surf->num_dcc_levels = in->numMipLevels;
|
||||
@@ -1254,6 +1254,106 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
|
||||
if (!surf->num_dcc_levels)
|
||||
surf->dcc_size = 0;
|
||||
|
||||
surf->u.gfx9.display_dcc_size = surf->dcc_size;
|
||||
surf->u.gfx9.display_dcc_alignment = surf->dcc_alignment;
|
||||
surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
|
||||
|
||||
/* Compute displayable DCC. */
|
||||
if (in->flags.display &&
|
||||
surf->num_dcc_levels &&
|
||||
info->use_display_dcc_with_retile_blit) {
|
||||
/* Compute displayable DCC info. */
|
||||
din.dccKeyFlags.pipeAligned = 0;
|
||||
din.dccKeyFlags.rbAligned = 0;
|
||||
|
||||
assert(din.numSlices == 1);
|
||||
assert(din.numMipLevels == 1);
|
||||
assert(din.numFrags == 1);
|
||||
assert(surf->tile_swizzle == 0);
|
||||
assert(surf->u.gfx9.dcc.pipe_aligned ||
|
||||
surf->u.gfx9.dcc.rb_aligned);
|
||||
|
||||
ret = Addr2ComputeDccInfo(addrlib, &din, &dout);
|
||||
if (ret != ADDR_OK)
|
||||
return ret;
|
||||
|
||||
surf->u.gfx9.display_dcc_size = dout.dccRamSize;
|
||||
surf->u.gfx9.display_dcc_alignment = dout.dccRamBaseAlign;
|
||||
surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
|
||||
assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size);
|
||||
|
||||
/* Compute address mapping from non-displayable to displayable DCC. */
|
||||
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {};
|
||||
addrin.size = sizeof(addrin);
|
||||
addrin.colorFlags.color = 1;
|
||||
addrin.swizzleMode = din.swizzleMode;
|
||||
addrin.resourceType = din.resourceType;
|
||||
addrin.bpp = din.bpp;
|
||||
addrin.unalignedWidth = din.unalignedWidth;
|
||||
addrin.unalignedHeight = din.unalignedHeight;
|
||||
addrin.numSlices = 1;
|
||||
addrin.numMipLevels = 1;
|
||||
addrin.numFrags = 1;
|
||||
|
||||
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
|
||||
addrout.size = sizeof(addrout);
|
||||
|
||||
surf->u.gfx9.dcc_retile_num_elements =
|
||||
DIV_ROUND_UP(in->width, dout.compressBlkWidth) *
|
||||
DIV_ROUND_UP(in->height, dout.compressBlkHeight) * 2;
|
||||
/* Align the size to 4 (for the compute shader). */
|
||||
surf->u.gfx9.dcc_retile_num_elements =
|
||||
align(surf->u.gfx9.dcc_retile_num_elements, 4);
|
||||
|
||||
surf->u.gfx9.dcc_retile_map =
|
||||
malloc(surf->u.gfx9.dcc_retile_num_elements * 4);
|
||||
if (!surf->u.gfx9.dcc_retile_map)
|
||||
return ADDR_OUTOFMEMORY;
|
||||
|
||||
unsigned index = 0;
|
||||
surf->u.gfx9.dcc_retile_use_uint16 = true;
|
||||
|
||||
for (unsigned y = 0; y < in->height; y += dout.compressBlkHeight) {
|
||||
addrin.y = y;
|
||||
|
||||
for (unsigned x = 0; x < in->width; x += dout.compressBlkWidth) {
|
||||
addrin.x = x;
|
||||
|
||||
/* Compute src DCC address */
|
||||
addrin.dccKeyFlags.pipeAligned = surf->u.gfx9.dcc.pipe_aligned;
|
||||
addrin.dccKeyFlags.rbAligned = surf->u.gfx9.dcc.rb_aligned;
|
||||
addrout.addr = 0;
|
||||
|
||||
ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout);
|
||||
if (ret != ADDR_OK)
|
||||
return ret;
|
||||
|
||||
surf->u.gfx9.dcc_retile_map[index * 2] = addrout.addr;
|
||||
if (addrout.addr > USHRT_MAX)
|
||||
surf->u.gfx9.dcc_retile_use_uint16 = false;
|
||||
|
||||
/* Compute dst DCC address */
|
||||
addrin.dccKeyFlags.pipeAligned = 0;
|
||||
addrin.dccKeyFlags.rbAligned = 0;
|
||||
addrout.addr = 0;
|
||||
|
||||
ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout);
|
||||
if (ret != ADDR_OK)
|
||||
return ret;
|
||||
|
||||
surf->u.gfx9.dcc_retile_map[index * 2 + 1] = addrout.addr;
|
||||
if (addrout.addr > USHRT_MAX)
|
||||
surf->u.gfx9.dcc_retile_use_uint16 = false;
|
||||
|
||||
assert(index * 2 + 1 < surf->u.gfx9.dcc_retile_num_elements);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
/* Fill the remaining pairs with the last one (for the compute shader). */
|
||||
for (unsigned i = index * 2; i < surf->u.gfx9.dcc_retile_num_elements; i++)
|
||||
surf->u.gfx9.dcc_retile_map[i] = surf->u.gfx9.dcc_retile_map[i - 2];
|
||||
}
|
||||
}
|
||||
|
||||
/* FMASK */
|
||||
@@ -1503,12 +1603,15 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
|
||||
surf->u.gfx9.surf_offset = 0;
|
||||
surf->u.gfx9.stencil_offset = 0;
|
||||
surf->cmask_size = 0;
|
||||
surf->u.gfx9.dcc_retile_use_uint16 = false;
|
||||
surf->u.gfx9.dcc_retile_num_elements = 0;
|
||||
surf->u.gfx9.dcc_retile_map = NULL;
|
||||
|
||||
/* Calculate texture layout information. */
|
||||
r = gfx9_compute_miptree(addrlib, config, surf, compressed,
|
||||
r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
|
||||
&AddrSurfInfoIn);
|
||||
if (r)
|
||||
return r;
|
||||
goto error;
|
||||
|
||||
/* Calculate texture layout information for stencil. */
|
||||
if (surf->flags & RADEON_SURF_SBUFFER) {
|
||||
@@ -1520,14 +1623,14 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
|
||||
r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn,
|
||||
false, &AddrSurfInfoIn.swizzleMode);
|
||||
if (r)
|
||||
return r;
|
||||
goto error;
|
||||
} else
|
||||
AddrSurfInfoIn.flags.depth = 0;
|
||||
|
||||
r = gfx9_compute_miptree(addrlib, config, surf, compressed,
|
||||
r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
|
||||
&AddrSurfInfoIn);
|
||||
if (r)
|
||||
return r;
|
||||
goto error;
|
||||
}
|
||||
|
||||
surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
|
||||
@@ -1538,7 +1641,7 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
|
||||
r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode,
|
||||
surf->bpe * 8, &displayable);
|
||||
if (r)
|
||||
return r;
|
||||
goto error;
|
||||
|
||||
/* Display needs unaligned DCC. */
|
||||
if (info->use_display_dcc_unaligned &&
|
||||
@@ -1590,7 +1693,8 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
|
||||
* to facilitate testing.
|
||||
*/
|
||||
assert(!"rotate micro tile mode is unsupported");
|
||||
return ADDR_ERROR;
|
||||
r = ADDR_ERROR;
|
||||
goto error;
|
||||
|
||||
/* Z = depth. */
|
||||
case ADDR_SW_4KB_Z:
|
||||
@@ -1608,6 +1712,11 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
free(surf->u.gfx9.dcc_retile_map);
|
||||
surf->u.gfx9.dcc_retile_map = NULL;
|
||||
return r;
|
||||
}
|
||||
|
||||
int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
|
||||
|
@@ -27,6 +27,7 @@
|
||||
#define AC_SURFACE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "amd_family.h"
|
||||
|
||||
@@ -149,9 +150,19 @@ struct gfx9_surf_layout {
|
||||
/* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
|
||||
uint32_t offset[RADEON_SURF_MAX_LEVELS];
|
||||
|
||||
uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
|
||||
|
||||
uint64_t stencil_offset; /* separate stencil */
|
||||
|
||||
/* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
|
||||
* The 3D engine doesn't support that layout except for chips with 1 RB.
|
||||
* All other chips must set rb_aligned=1.
|
||||
* A compute shader needs to convert from aligned DCC to unaligned.
|
||||
*/
|
||||
uint32_t display_dcc_size;
|
||||
uint32_t display_dcc_alignment;
|
||||
uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
|
||||
bool dcc_retile_use_uint16; /* if all values fit into uint16_t */
|
||||
uint32_t dcc_retile_num_elements;
|
||||
uint32_t *dcc_retile_map;
|
||||
};
|
||||
|
||||
struct radeon_surf {
|
||||
|
@@ -496,7 +496,8 @@ static void cik_sdma_copy(struct pipe_context *ctx,
|
||||
dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
|
||||
goto fallback;
|
||||
|
||||
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
|
||||
/* If src is a buffer and dst is a texture, we are uploading metadata. */
|
||||
if (src->target == PIPE_BUFFER) {
|
||||
cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
|
||||
return;
|
||||
}
|
||||
|
@@ -1318,6 +1318,9 @@ static void si_flush_resource(struct pipe_context *ctx,
|
||||
si_blit_decompress_color(sctx, tex, 0, res->last_level,
|
||||
0, util_max_layer(res, 0),
|
||||
tex->dcc_separate_buffer != NULL);
|
||||
|
||||
if (tex->display_dcc_offset)
|
||||
si_retile_dcc(sctx, tex);
|
||||
}
|
||||
|
||||
/* Always do the analysis even if DCC is disabled at the moment. */
|
||||
|
@@ -416,6 +416,84 @@ void si_compute_copy_image(struct si_context *sctx,
|
||||
si_compute_internal_end(sctx);
|
||||
}
|
||||
|
||||
void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
|
||||
{
|
||||
struct pipe_context *ctx = &sctx->b;
|
||||
|
||||
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
|
||||
SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||
si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU) |
|
||||
si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_LRU);
|
||||
si_emit_cache_flush(sctx);
|
||||
|
||||
/* Save states. */
|
||||
void *saved_cs = sctx->cs_shader_state.program;
|
||||
struct pipe_image_view saved_img[3] = {};
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
util_copy_image_view(&saved_img[i],
|
||||
&sctx->images[PIPE_SHADER_COMPUTE].views[i]);
|
||||
}
|
||||
|
||||
/* Set images. */
|
||||
bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
|
||||
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
|
||||
struct pipe_image_view img[3];
|
||||
|
||||
assert(tex->dcc_retile_map_offset && tex->dcc_retile_map_offset <= UINT_MAX);
|
||||
assert(tex->dcc_offset && tex->dcc_offset <= UINT_MAX);
|
||||
assert(tex->display_dcc_offset && tex->display_dcc_offset <= UINT_MAX);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
img[i].resource = &tex->buffer.b.b;
|
||||
img[i].access = i == 2 ? PIPE_IMAGE_ACCESS_WRITE : PIPE_IMAGE_ACCESS_READ;
|
||||
img[i].shader_access = SI_IMAGE_ACCESS_AS_BUFFER;
|
||||
}
|
||||
|
||||
img[0].format = use_uint16 ? PIPE_FORMAT_R16G16B16A16_UINT :
|
||||
PIPE_FORMAT_R32G32B32A32_UINT;
|
||||
img[0].u.buf.offset = tex->dcc_retile_map_offset;
|
||||
img[0].u.buf.size = num_elements * (use_uint16 ? 2 : 4);
|
||||
|
||||
img[1].format = PIPE_FORMAT_R8_UINT;
|
||||
img[1].u.buf.offset = tex->dcc_offset;
|
||||
img[1].u.buf.size = tex->surface.dcc_size;
|
||||
|
||||
img[2].format = PIPE_FORMAT_R8_UINT;
|
||||
img[2].u.buf.offset = tex->display_dcc_offset;
|
||||
img[2].u.buf.size = tex->surface.u.gfx9.display_dcc_size;
|
||||
|
||||
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 3, img);
|
||||
|
||||
/* Bind the compute shader. */
|
||||
if (!sctx->cs_dcc_retile)
|
||||
sctx->cs_dcc_retile = si_create_dcc_retile_cs(ctx);
|
||||
ctx->bind_compute_state(ctx, sctx->cs_dcc_retile);
|
||||
|
||||
/* Dispatch compute. */
|
||||
/* img[0] has 4 channels per element containing 2 pairs of DCC offsets. */
|
||||
unsigned num_threads = num_elements / 4;
|
||||
|
||||
struct pipe_grid_info info = {};
|
||||
info.block[0] = 64;
|
||||
info.block[1] = 1;
|
||||
info.block[2] = 1;
|
||||
info.grid[0] = DIV_ROUND_UP(num_threads, 64); /* includes the partial block */
|
||||
info.grid[1] = 1;
|
||||
info.grid[2] = 1;
|
||||
info.last_block[0] = num_threads % 64;
|
||||
|
||||
ctx->launch_grid(ctx, &info);
|
||||
|
||||
/* Don't flush caches or wait. The driver will wait at the end of this IB,
|
||||
* and L2 will be flushed by the kernel fence.
|
||||
*/
|
||||
|
||||
/* Restore states. */
|
||||
ctx->bind_compute_state(ctx, saved_cs);
|
||||
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 3, saved_img);
|
||||
}
|
||||
|
||||
void si_init_compute_blit_functions(struct si_context *sctx)
|
||||
{
|
||||
sctx->b.clear_buffer = si_pipe_clear_buffer;
|
||||
|
@@ -210,6 +210,8 @@ static void si_destroy_context(struct pipe_context *context)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target);
|
||||
if (sctx->cs_clear_render_target_1d_array)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target_1d_array);
|
||||
if (sctx->cs_dcc_retile)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_retile);
|
||||
|
||||
if (sctx->blitter)
|
||||
util_blitter_destroy(sctx->blitter);
|
||||
|
@@ -277,12 +277,22 @@ struct si_texture {
|
||||
uint64_t size;
|
||||
struct si_texture *flushed_depth_texture;
|
||||
|
||||
/* Colorbuffer compression and fast clear. */
|
||||
/* One texture allocation can contain these buffers:
|
||||
* - image (pixel data)
|
||||
* - FMASK buffer (MSAA compression)
|
||||
* - CMASK buffer (MSAA compression and/or legacy fast color clear)
|
||||
* - HTILE buffer (Z/S compression and fast Z/S clear)
|
||||
* - DCC buffer (color compression and new fast color clear)
|
||||
* - displayable DCC buffer (if the DCC buffer is not displayable)
|
||||
* - DCC retile mapping buffer (if the DCC buffer is not displayable)
|
||||
*/
|
||||
uint64_t fmask_offset;
|
||||
uint64_t cmask_offset;
|
||||
uint64_t cmask_base_address_reg;
|
||||
struct si_resource *cmask_buffer;
|
||||
uint64_t dcc_offset; /* 0 = disabled */
|
||||
uint64_t display_dcc_offset;
|
||||
uint64_t dcc_retile_map_offset;
|
||||
unsigned cb_color_info; /* fast clear enable bit */
|
||||
unsigned color_clear_value[2];
|
||||
unsigned last_msaa_resolve_target_micro_mode;
|
||||
@@ -827,6 +837,7 @@ struct si_context {
|
||||
void *cs_copy_image_1d_array;
|
||||
void *cs_clear_render_target;
|
||||
void *cs_clear_render_target_1d_array;
|
||||
void *cs_dcc_retile;
|
||||
struct si_screen *screen;
|
||||
struct pipe_debug_callback debug;
|
||||
struct ac_llvm_compiler compiler; /* only non-threaded compilation */
|
||||
@@ -1195,6 +1206,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
|
||||
unsigned dstx, unsigned dsty,
|
||||
unsigned width, unsigned height,
|
||||
bool render_condition_enabled);
|
||||
void si_retile_dcc(struct si_context *sctx, struct si_texture *tex);
|
||||
void si_init_compute_blit_functions(struct si_context *sctx);
|
||||
|
||||
/* si_cp_dma.c */
|
||||
@@ -1313,6 +1325,7 @@ void *si_create_copy_image_compute_shader(struct pipe_context *ctx);
|
||||
void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx);
|
||||
void *si_clear_render_target_shader(struct pipe_context *ctx);
|
||||
void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx);
|
||||
void *si_create_dcc_retile_cs(struct pipe_context *ctx);
|
||||
void *si_create_query_result_cs(struct si_context *sctx);
|
||||
|
||||
/* si_test_dma.c */
|
||||
|
@@ -226,6 +226,79 @@ void *si_create_dma_compute_shader(struct pipe_context *ctx,
|
||||
return cs;
|
||||
}
|
||||
|
||||
/* Create a compute shader that copies DCC from one buffer to another
|
||||
* where each DCC buffer has a different layout.
|
||||
*
|
||||
* image[0]: offset remap table (pairs of <src_offset, dst_offset>),
|
||||
* 2 pairs are read
|
||||
* image[1]: DCC source buffer, typed r8_uint
|
||||
* image[2]: DCC destination buffer, typed r8_uint
|
||||
*/
|
||||
void *si_create_dcc_retile_cs(struct pipe_context *ctx)
|
||||
{
|
||||
struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE);
|
||||
if (!ureg)
|
||||
return NULL;
|
||||
|
||||
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64);
|
||||
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1);
|
||||
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1);
|
||||
|
||||
/* Compute the global thread ID (in idx). */
|
||||
struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0);
|
||||
struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0);
|
||||
struct ureg_dst idx = ureg_writemask(ureg_DECL_temporary(ureg),
|
||||
TGSI_WRITEMASK_X);
|
||||
ureg_UMAD(ureg, idx, blk, ureg_imm1u(ureg, 64), tid);
|
||||
|
||||
/* Load 2 pairs of offsets for DCC load & store. */
|
||||
struct ureg_src map = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, 0, false, false);
|
||||
struct ureg_dst offsets = ureg_DECL_temporary(ureg);
|
||||
struct ureg_src map_load_args[] = {map, ureg_src(idx)};
|
||||
|
||||
ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &offsets, 1, map_load_args, 2,
|
||||
TGSI_MEMORY_RESTRICT, TGSI_TEXTURE_BUFFER, 0);
|
||||
|
||||
struct ureg_src dcc_src = ureg_DECL_image(ureg, 1, TGSI_TEXTURE_BUFFER,
|
||||
0, false, false);
|
||||
struct ureg_dst dcc_dst = ureg_dst(ureg_DECL_image(ureg, 2, TGSI_TEXTURE_BUFFER,
|
||||
0, true, false));
|
||||
struct ureg_dst dcc_value[2];
|
||||
|
||||
/* Copy DCC values:
|
||||
* dst[offsets.y] = src[offsets.x];
|
||||
* dst[offsets.w] = src[offsets.z];
|
||||
*/
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
dcc_value[i] = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
|
||||
|
||||
struct ureg_src load_args[] =
|
||||
{dcc_src, ureg_scalar(ureg_src(offsets), TGSI_SWIZZLE_X + i*2)};
|
||||
ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dcc_value[i], 1, load_args, 2,
|
||||
TGSI_MEMORY_RESTRICT, TGSI_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
|
||||
dcc_dst = ureg_writemask(dcc_dst, TGSI_WRITEMASK_X);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
struct ureg_src store_args[] = {
|
||||
ureg_scalar(ureg_src(offsets), TGSI_SWIZZLE_Y + i*2),
|
||||
ureg_src(dcc_value[i])
|
||||
};
|
||||
ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dcc_dst, 1, store_args, 2,
|
||||
TGSI_MEMORY_RESTRICT, TGSI_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
ureg_END(ureg);
|
||||
|
||||
struct pipe_compute_state state = {};
|
||||
state.ir_type = PIPE_SHADER_IR_TGSI;
|
||||
state.prog = ureg_get_tokens(ureg, NULL);
|
||||
|
||||
void *cs = ctx->create_compute_state(ctx, &state);
|
||||
ureg_destroy(ureg);
|
||||
return cs;
|
||||
}
|
||||
|
||||
/* Create the compute shader that is used to collect the results.
|
||||
*
|
||||
* One compute grid with a single thread is launched for every query result
|
||||
|
@@ -431,13 +431,17 @@ static bool si_can_disable_dcc(struct si_texture *tex)
|
||||
static bool si_texture_discard_dcc(struct si_screen *sscreen,
|
||||
struct si_texture *tex)
|
||||
{
|
||||
if (!si_can_disable_dcc(tex))
|
||||
if (!si_can_disable_dcc(tex)) {
|
||||
assert(tex->display_dcc_offset == 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(tex->dcc_separate_buffer == NULL);
|
||||
|
||||
/* Disable DCC. */
|
||||
tex->dcc_offset = 0;
|
||||
tex->display_dcc_offset = 0;
|
||||
tex->dcc_retile_map_offset = 0;
|
||||
|
||||
/* Notify all contexts about the change. */
|
||||
p_atomic_inc(&sscreen->dirty_tex_counter);
|
||||
@@ -625,7 +629,9 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen,
|
||||
md.u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
|
||||
|
||||
if (tex->dcc_offset && !tex->dcc_separate_buffer) {
|
||||
uint64_t dcc_offset = tex->dcc_offset;
|
||||
uint64_t dcc_offset =
|
||||
tex->display_dcc_offset ? tex->display_dcc_offset
|
||||
: tex->dcc_offset;
|
||||
|
||||
assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
|
||||
md.u.gfx9.dcc_offset_256B = dcc_offset >> 8;
|
||||
@@ -763,6 +769,11 @@ static bool si_has_displayable_dcc(struct si_texture *tex)
|
||||
!tex->surface.u.gfx9.dcc.rb_aligned)
|
||||
return true;
|
||||
|
||||
/* This needs an explicit flush (flush_resource). */
|
||||
if (sscreen->info.use_display_dcc_with_retile_blit &&
|
||||
tex->display_dcc_offset)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -910,9 +921,13 @@ static boolean si_texture_get_handle(struct pipe_screen* screen,
|
||||
static void si_texture_destroy(struct pipe_screen *screen,
|
||||
struct pipe_resource *ptex)
|
||||
{
|
||||
struct si_screen *sscreen = (struct si_screen*)screen;
|
||||
struct si_texture *tex = (struct si_texture*)ptex;
|
||||
struct si_resource *resource = &tex->buffer;
|
||||
|
||||
if (sscreen->info.chip_class >= GFX9)
|
||||
free(tex->surface.u.gfx9.dcc_retile_map);
|
||||
|
||||
si_texture_reference(&tex->flushed_depth_texture, NULL);
|
||||
|
||||
if (tex->cmask_buffer != &tex->buffer) {
|
||||
@@ -1254,10 +1269,32 @@ si_texture_create_object(struct pipe_screen *screen,
|
||||
if (tex->surface.dcc_size &&
|
||||
(buf || !(sscreen->debug_flags & DBG(NO_DCC))) &&
|
||||
(sscreen->info.use_display_dcc_unaligned ||
|
||||
sscreen->info.use_display_dcc_with_retile_blit ||
|
||||
!(tex->surface.flags & RADEON_SURF_SCANOUT))) {
|
||||
/* Add space for the DCC buffer. */
|
||||
tex->dcc_offset = align64(tex->size, tex->surface.dcc_alignment);
|
||||
tex->size = tex->dcc_offset + tex->surface.dcc_size;
|
||||
|
||||
if (sscreen->info.chip_class >= GFX9 &&
|
||||
tex->surface.u.gfx9.dcc_retile_num_elements) {
|
||||
/* Add space for the displayable DCC buffer. */
|
||||
tex->display_dcc_offset =
|
||||
align64(tex->size, tex->surface.u.gfx9.display_dcc_alignment);
|
||||
tex->size = tex->display_dcc_offset +
|
||||
tex->surface.u.gfx9.display_dcc_size;
|
||||
|
||||
/* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
|
||||
tex->dcc_retile_map_offset =
|
||||
align64(tex->size, sscreen->info.tcc_cache_line_size);
|
||||
|
||||
if (tex->surface.u.gfx9.dcc_retile_use_uint16) {
|
||||
tex->size = tex->dcc_retile_map_offset +
|
||||
tex->surface.u.gfx9.dcc_retile_num_elements * 2;
|
||||
} else {
|
||||
tex->size = tex->dcc_retile_map_offset +
|
||||
tex->surface.u.gfx9.dcc_retile_num_elements * 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1353,6 +1390,46 @@ si_texture_create_object(struct pipe_screen *screen,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Upload the DCC retile map. */
|
||||
if (tex->dcc_retile_map_offset) {
|
||||
/* Use a staging buffer for the upload, because
|
||||
* the buffer backing the texture is unmappable.
|
||||
*/
|
||||
bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
|
||||
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
|
||||
struct si_resource *buf =
|
||||
si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM,
|
||||
num_elements * (use_uint16 ? 2 : 4),
|
||||
sscreen->info.tcc_cache_line_size);
|
||||
uint32_t *ui = (uint32_t*)sscreen->ws->buffer_map(buf->buf, NULL,
|
||||
PIPE_TRANSFER_WRITE);
|
||||
uint16_t *us = (uint16_t*)ui;
|
||||
|
||||
/* Upload the retile map into a staging buffer. */
|
||||
if (use_uint16) {
|
||||
for (unsigned i = 0; i < num_elements; i++)
|
||||
us[i] = tex->surface.u.gfx9.dcc_retile_map[i];
|
||||
} else {
|
||||
for (unsigned i = 0; i < num_elements; i++)
|
||||
ui[i] = tex->surface.u.gfx9.dcc_retile_map[i];
|
||||
}
|
||||
|
||||
/* Copy the staging buffer to the buffer backing the texture. */
|
||||
struct si_context *sctx = (struct si_context*)sscreen->aux_context;
|
||||
struct pipe_box box;
|
||||
u_box_1d(0, buf->b.b.width0, &box);
|
||||
|
||||
assert(tex->dcc_retile_map_offset <= UINT_MAX);
|
||||
mtx_lock(&sscreen->aux_context_lock);
|
||||
sctx->dma_copy(&sctx->b, &tex->buffer.b.b, 0,
|
||||
tex->dcc_retile_map_offset, 0, 0,
|
||||
&buf->b.b, 0, &box);
|
||||
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
|
||||
mtx_unlock(&sscreen->aux_context_lock);
|
||||
|
||||
si_resource_reference(&buf, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize the CMASK base register value. */
|
||||
@@ -1381,6 +1458,8 @@ si_texture_create_object(struct pipe_screen *screen,
|
||||
|
||||
error:
|
||||
FREE(tex);
|
||||
if (sscreen->info.chip_class >= GFX9)
|
||||
free(surface->u.gfx9.dcc_retile_map);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user