radeonsi: introduce SI_RESOURCE_FLAG_INTERNAL / RADEON_FLAG_DRIVER_INTERNAL

Tag allocations as driver internal.
Some of these allocations will need to be doubled to handle TMZ (one secure bo,
one normal bo) but these allocations shouldn't switch the winsys in "the app
is using TMZ".

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6049>
This commit is contained in:
Pierre-Eric Pelloux-Prayer
2020-07-23 10:29:14 +02:00
committed by Marge Bot
parent 131412cc4d
commit 5e4aecec93
13 changed files with 47 additions and 21 deletions

View File

@@ -69,6 +69,7 @@ enum radeon_bo_flag
RADEON_FLAG_32BIT = (1 << 6), RADEON_FLAG_32BIT = (1 << 6),
RADEON_FLAG_ENCRYPTED = (1 << 7), RADEON_FLAG_ENCRYPTED = (1 << 7),
RADEON_FLAG_UNCACHED = (1 << 8), /* only gfx9 and newer */ RADEON_FLAG_UNCACHED = (1 << 8), /* only gfx9 and newer */
RADEON_FLAG_DRIVER_INTERNAL = (1 << 9),
}; };
enum radeon_dependency_flag enum radeon_dependency_flag
@@ -834,7 +835,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
/* Unsupported flags: NO_SUBALLOC, SPARSE. */ /* Unsupported flags: NO_SUBALLOC, SPARSE. */
if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_UNCACHED | if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_UNCACHED |
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT |
RADEON_FLAG_DRIVER_INTERNAL))
return -1; return -1;
switch (domain) { switch (domain) {

View File

@@ -1102,7 +1102,7 @@ resolve_to_temp:
templ.usage = PIPE_USAGE_DEFAULT; templ.usage = PIPE_USAGE_DEFAULT;
templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE | templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE |
SI_RESOURCE_FLAG_MICRO_TILE_MODE_SET(src->surface.micro_tile_mode) | SI_RESOURCE_FLAG_MICRO_TILE_MODE_SET(src->surface.micro_tile_mode) |
SI_RESOURCE_FLAG_DISABLE_DCC; SI_RESOURCE_FLAG_DISABLE_DCC | SI_RESOURCE_FLAG_DRIVER_INTERNAL;
/* The src and dst microtile modes must be the same. */ /* The src and dst microtile modes must be the same. */
if (sctx->chip_class <= GFX8 && src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) if (sctx->chip_class <= GFX8 && src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)

View File

@@ -182,6 +182,9 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
if (res->b.b.flags & SI_RESOURCE_FLAG_32BIT) if (res->b.b.flags & SI_RESOURCE_FLAG_32BIT)
res->flags |= RADEON_FLAG_32BIT; res->flags |= RADEON_FLAG_32BIT;
if (res->b.b.flags & SI_RESOURCE_FLAG_DRIVER_INTERNAL)
res->flags |= RADEON_FLAG_DRIVER_INTERNAL;
/* For higher throughput and lower latency over PCIe assuming sequential access. /* For higher throughput and lower latency over PCIe assuming sequential access.
* Only CP DMA, SDMA, and optimized compute benefit from this. * Only CP DMA, SDMA, and optimized compute benefit from this.
* GFX8 and older don't support RADEON_FLAG_UNCACHED. * GFX8 and older don't support RADEON_FLAG_UNCACHED.
@@ -479,7 +482,8 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resour
struct si_resource *staging; struct si_resource *staging;
assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE))); assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE)));
staging = si_aligned_buffer_create(ctx->screen, SI_RESOURCE_FLAG_UNCACHED, staging = si_aligned_buffer_create(ctx->screen,
SI_RESOURCE_FLAG_UNCACHED | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_STAGING, PIPE_USAGE_STAGING,
box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT), 256); box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT), 256);
if (staging) { if (staging) {

View File

@@ -423,7 +423,9 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s
si_resource_reference(&sctx->compute_scratch_buffer, NULL); si_resource_reference(&sctx->compute_scratch_buffer, NULL);
sctx->compute_scratch_buffer = sctx->compute_scratch_buffer =
si_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, si_aligned_buffer_create(&sctx->screen->b,
SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
scratch_needed, sctx->screen->info.pte_fragment_size); scratch_needed, sctx->screen->info.pte_fragment_size);
if (!sctx->compute_scratch_buffer) if (!sctx->compute_scratch_buffer)

View File

@@ -912,7 +912,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx)
unsigned num_oa_counters = VERTEX_COUNTER_GDS_MODE == 2 ? 2 : 0; unsigned num_oa_counters = VERTEX_COUNTER_GDS_MODE == 2 ? 2 : 0;
if (gds_size) { if (gds_size) {
sctx->gds = ws->buffer_create(ws, gds_size, 4, RADEON_DOMAIN_GDS, 0); sctx->gds = ws->buffer_create(ws, gds_size, 4, RADEON_DOMAIN_GDS,
RADEON_FLAG_DRIVER_INTERNAL);
if (!sctx->gds) if (!sctx->gds)
return false; return false;
@@ -920,7 +921,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx)
} }
if (num_oa_counters) { if (num_oa_counters) {
assert(gds_size); assert(gds_size);
sctx->gds_oa = ws->buffer_create(ws, num_oa_counters, 1, RADEON_DOMAIN_OA, 0); sctx->gds_oa = ws->buffer_create(ws, num_oa_counters, 1, RADEON_DOMAIN_OA,
RADEON_FLAG_DRIVER_INTERNAL);
if (!sctx->gds_oa) if (!sctx->gds_oa)
return false; return false;
@@ -935,7 +937,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx)
if (!sctx->index_ring) { if (!sctx->index_ring) {
sctx->index_ring = si_aligned_buffer_create( sctx->index_ring = si_aligned_buffer_create(
sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
sctx->index_ring_size_per_ib * 2, sctx->screen->info.pte_fragment_size); sctx->index_ring_size_per_ib * 2, sctx->screen->info.pte_fragment_size);
if (!sctx->index_ring) if (!sctx->index_ring)
return false; return false;

View File

@@ -265,7 +265,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, uns
*/ */
if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) { if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) {
si_resource_reference(&sctx->scratch_buffer, NULL); si_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b,
SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT, scratch_size, 256); PIPE_USAGE_DEFAULT, scratch_size, 256);
if (!sctx->scratch_buffer) if (!sctx->scratch_buffer)
return; return;

View File

@@ -150,7 +150,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
sctx->screen->debug_flags & DBG(SHADOW_REGS)) { sctx->screen->debug_flags & DBG(SHADOW_REGS)) {
sctx->shadowed_regs = sctx->shadowed_regs =
si_aligned_buffer_create(sctx->b.screen, si_aligned_buffer_create(sctx->b.screen,
SI_RESOURCE_FLAG_UNMAPPABLE, SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT, PIPE_USAGE_DEFAULT,
SI_SHADOWED_REG_BUFFER_SIZE, SI_SHADOWED_REG_BUFFER_SIZE,
4096); 4096);

View File

@@ -306,8 +306,8 @@ void si_allocate_gds(struct si_context *sctx)
/* 4 streamout GDS counters. /* 4 streamout GDS counters.
* We need 256B (64 dw) of GDS, otherwise streamout hangs. * We need 256B (64 dw) of GDS, otherwise streamout hangs.
*/ */
sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, 0); sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, RADEON_FLAG_DRIVER_INTERNAL);
sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, 0); sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, RADEON_FLAG_DRIVER_INTERNAL);
assert(sctx->gds && sctx->gds_oa); assert(sctx->gds && sctx->gds_oa);
si_add_gds_to_buffer_list(sctx); si_add_gds_to_buffer_list(sctx);

View File

@@ -611,7 +611,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
if (sctx->chip_class >= GFX9 || si_compute_prim_discard_enabled(sctx)) { if (sctx->chip_class >= GFX9 || si_compute_prim_discard_enabled(sctx)) {
sctx->wait_mem_scratch = sctx->wait_mem_scratch =
si_aligned_buffer_create(screen, SI_RESOURCE_FLAG_UNMAPPABLE, si_aligned_buffer_create(screen,
SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT, 8, PIPE_USAGE_DEFAULT, 8,
sscreen->info.tcc_cache_line_size); sscreen->info.tcc_cache_line_size);
if (!sctx->wait_mem_scratch) if (!sctx->wait_mem_scratch)
@@ -622,7 +623,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
* if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
if (sctx->chip_class == GFX7) { if (sctx->chip_class == GFX7) {
sctx->null_const_buf.buffer = sctx->null_const_buf.buffer =
pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, 16, pipe_aligned_buffer_create(screen,
SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT, 16,
sctx->screen->info.tcc_cache_line_size); sctx->screen->info.tcc_cache_line_size);
if (!sctx->null_const_buf.buffer) if (!sctx->null_const_buf.buffer)
goto fail; goto fail;

View File

@@ -123,6 +123,7 @@
#define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x) \ #define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x) \
(((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3) (((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3)
#define SI_RESOURCE_FLAG_UNCACHED (PIPE_RESOURCE_FLAG_DRV_PRIV << 12) #define SI_RESOURCE_FLAG_UNCACHED (PIPE_RESOURCE_FLAG_DRV_PRIV << 12)
#define SI_RESOURCE_FLAG_DRIVER_INTERNAL (PIPE_RESOURCE_FLAG_DRV_PRIV << 13)
enum si_clear_code enum si_clear_code
{ {

View File

@@ -886,7 +886,9 @@ bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader
si_resource_reference(&shader->bo, NULL); si_resource_reference(&shader->bo, NULL);
shader->bo = si_aligned_buffer_create( shader->bo = si_aligned_buffer_create(
&sscreen->b, sscreen->info.cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY, &sscreen->b,
(sscreen->info.cpdma_prefetch_writes_memory ?
0 : SI_RESOURCE_FLAG_READ_ONLY) | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_IMMUTABLE, align(binary.rx_size, SI_CPDMA_ALIGNMENT), 256); PIPE_USAGE_IMMUTABLE, align(binary.rx_size, SI_CPDMA_ALIGNMENT), 256);
if (!shader->bo) if (!shader->bo)
return false; return false;

View File

@@ -3400,7 +3400,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
if (update_esgs) { if (update_esgs) {
pipe_resource_reference(&sctx->esgs_ring, NULL); pipe_resource_reference(&sctx->esgs_ring, NULL);
sctx->esgs_ring = sctx->esgs_ring =
pipe_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, pipe_aligned_buffer_create(sctx->b.screen,
SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
esgs_ring_size, sctx->screen->info.pte_fragment_size); esgs_ring_size, sctx->screen->info.pte_fragment_size);
if (!sctx->esgs_ring) if (!sctx->esgs_ring)
return false; return false;
@@ -3409,7 +3411,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
if (update_gsvs) { if (update_gsvs) {
pipe_resource_reference(&sctx->gsvs_ring, NULL); pipe_resource_reference(&sctx->gsvs_ring, NULL);
sctx->gsvs_ring = sctx->gsvs_ring =
pipe_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, pipe_aligned_buffer_create(sctx->b.screen,
SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
gsvs_ring_size, sctx->screen->info.pte_fragment_size); gsvs_ring_size, sctx->screen->info.pte_fragment_size);
if (!sctx->gsvs_ring) if (!sctx->gsvs_ring)
return false; return false;
@@ -3655,7 +3659,9 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
si_resource_reference(&sctx->scratch_buffer, NULL); si_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = si_aligned_buffer_create( sctx->scratch_buffer = si_aligned_buffer_create(
&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, scratch_needed_size, &sctx->screen->b,
SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT, scratch_needed_size,
sctx->screen->info.pte_fragment_size); sctx->screen->info.pte_fragment_size);
if (!sctx->scratch_buffer) if (!sctx->scratch_buffer)
return false; return false;
@@ -3690,7 +3696,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
* receives the high 13 bits. * receives the high 13 bits.
*/ */
sctx->tess_rings = pipe_aligned_buffer_create( sctx->tess_rings = pipe_aligned_buffer_create(
sctx->b.screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, sctx->b.screen, SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT,
sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 1 << 19); sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 1 << 19);
if (!sctx->tess_rings) if (!sctx->tess_rings)
return; return;

View File

@@ -1165,10 +1165,12 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements; unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4); unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4);
tex->dcc_retile_buffer = si_aligned_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, tex->dcc_retile_buffer = si_aligned_buffer_create(screen,
SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT,
dcc_retile_map_size, dcc_retile_map_size,
sscreen->info.tcc_cache_line_size); sscreen->info.tcc_cache_line_size);
struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM, struct si_resource *buf = si_aligned_buffer_create(screen,
SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_STREAM,
dcc_retile_map_size, dcc_retile_map_size,
sscreen->info.tcc_cache_line_size); sscreen->info.tcc_cache_line_size);
void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_MAP_WRITE); void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_MAP_WRITE);
@@ -1684,7 +1686,7 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
struct pipe_resource resource; struct pipe_resource resource;
struct si_texture *staging; struct si_texture *staging;
unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR; unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR | SI_RESOURCE_FLAG_DRIVER_INTERNAL;
/* The pixel shader has a bad access pattern for linear textures. /* The pixel shader has a bad access pattern for linear textures.
* If a pixel shader is used to blit to/from staging, don't disable caches. * If a pixel shader is used to blit to/from staging, don't disable caches.