diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index f923779012b..938e36c4c6a 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -69,6 +69,7 @@ enum radeon_bo_flag RADEON_FLAG_32BIT = (1 << 6), RADEON_FLAG_ENCRYPTED = (1 << 7), RADEON_FLAG_UNCACHED = (1 << 8), /* only gfx9 and newer */ + RADEON_FLAG_DRIVER_INTERNAL = (1 << 9), }; enum radeon_dependency_flag @@ -834,7 +835,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo /* Unsupported flags: NO_SUBALLOC, SPARSE. */ if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_UNCACHED | - RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) + RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT | + RADEON_FLAG_DRIVER_INTERNAL)) return -1; switch (domain) { diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 4e77e61ac86..71037726863 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1102,7 +1102,7 @@ resolve_to_temp: templ.usage = PIPE_USAGE_DEFAULT; templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE | SI_RESOURCE_FLAG_MICRO_TILE_MODE_SET(src->surface.micro_tile_mode) | - SI_RESOURCE_FLAG_DISABLE_DCC; + SI_RESOURCE_FLAG_DISABLE_DCC | SI_RESOURCE_FLAG_DRIVER_INTERNAL; /* The src and dst microtile modes must be the same. */ if (sctx->chip_class <= GFX8 && src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 7d6c4d377d2..2318ecff016 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -182,6 +182,9 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, if (res->b.b.flags & SI_RESOURCE_FLAG_32BIT) res->flags |= RADEON_FLAG_32BIT; + if (res->b.b.flags & SI_RESOURCE_FLAG_DRIVER_INTERNAL) + res->flags |= RADEON_FLAG_DRIVER_INTERNAL; + /* For higher throughput and lower latency over PCIe assuming sequential access. * Only CP DMA, SDMA, and optimized compute benefit from this. * GFX8 and older don't support RADEON_FLAG_UNCACHED. @@ -479,7 +482,8 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resour struct si_resource *staging; assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE))); - staging = si_aligned_buffer_create(ctx->screen, SI_RESOURCE_FLAG_UNCACHED, + staging = si_aligned_buffer_create(ctx->screen, + SI_RESOURCE_FLAG_UNCACHED | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_STAGING, box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT), 256); if (staging) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 88d99ae0d62..2830fb3fdc6 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -423,7 +423,9 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s si_resource_reference(&sctx->compute_scratch_buffer, NULL); sctx->compute_scratch_buffer = - si_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + si_aligned_buffer_create(&sctx->screen->b, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, scratch_needed, sctx->screen->info.pte_fragment_size); if (!sctx->compute_scratch_buffer) diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 6cb461fbf53..f5f767beb61 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -912,7 +912,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) unsigned num_oa_counters = VERTEX_COUNTER_GDS_MODE == 2 ? 2 : 0; if (gds_size) { - sctx->gds = ws->buffer_create(ws, gds_size, 4, RADEON_DOMAIN_GDS, 0); + sctx->gds = ws->buffer_create(ws, gds_size, 4, RADEON_DOMAIN_GDS, + RADEON_FLAG_DRIVER_INTERNAL); if (!sctx->gds) return false; @@ -920,7 +921,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) } if (num_oa_counters) { assert(gds_size); - sctx->gds_oa = ws->buffer_create(ws, num_oa_counters, 1, RADEON_DOMAIN_OA, 0); + sctx->gds_oa = ws->buffer_create(ws, num_oa_counters, 1, RADEON_DOMAIN_OA, + RADEON_FLAG_DRIVER_INTERNAL); if (!sctx->gds_oa) return false; @@ -935,7 +937,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) if (!sctx->index_ring) { sctx->index_ring = si_aligned_buffer_create( - sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, sctx->index_ring_size_per_ib * 2, sctx->screen->info.pte_fragment_size); if (!sctx->index_ring) return false; diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index e2fc6f30edf..133b4514500 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -265,7 +265,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, uns */ if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) { si_resource_reference(&sctx->scratch_buffer, NULL); - sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, + sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, scratch_size, 256); if (!sctx->scratch_buffer) return; diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index bc8f89cacab..5df3b764096 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -150,7 +150,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) sctx->screen->debug_flags & DBG(SHADOW_REGS)) { sctx->shadowed_regs = si_aligned_buffer_create(sctx->b.screen, - SI_RESOURCE_FLAG_UNMAPPABLE, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, SI_SHADOWED_REG_BUFFER_SIZE, 4096); diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 6a50dcae242..6a92e9860e4 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -306,8 +306,8 @@ void si_allocate_gds(struct si_context *sctx) /* 4 streamout GDS counters. * We need 256B (64 dw) of GDS, otherwise streamout hangs. */ - sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, 0); - sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, 0); + sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, RADEON_FLAG_DRIVER_INTERNAL); + sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, RADEON_FLAG_DRIVER_INTERNAL); assert(sctx->gds && sctx->gds_oa); si_add_gds_to_buffer_list(sctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 1a429d10d55..a7bef27c475 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -611,7 +611,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign if (sctx->chip_class >= GFX9 || si_compute_prim_discard_enabled(sctx)) { sctx->wait_mem_scratch = - si_aligned_buffer_create(screen, SI_RESOURCE_FLAG_UNMAPPABLE, + si_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, 8, sscreen->info.tcc_cache_line_size); if (!sctx->wait_mem_scratch) @@ -622,7 +623,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ if (sctx->chip_class == GFX7) { sctx->null_const_buf.buffer = - pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, 16, + pipe_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, 16, sctx->screen->info.tcc_cache_line_size); if (!sctx->null_const_buf.buffer) goto fail; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 33ac498a508..de888693d8c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -123,6 +123,7 @@ #define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x) \ (((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3) #define SI_RESOURCE_FLAG_UNCACHED (PIPE_RESOURCE_FLAG_DRV_PRIV << 12) +#define SI_RESOURCE_FLAG_DRIVER_INTERNAL (PIPE_RESOURCE_FLAG_DRV_PRIV << 13) enum si_clear_code { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 14803b3b53f..8e688cd65af 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -886,7 +886,9 @@ bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader si_resource_reference(&shader->bo, NULL); shader->bo = si_aligned_buffer_create( - &sscreen->b, sscreen->info.cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY, + &sscreen->b, + (sscreen->info.cpdma_prefetch_writes_memory ? + 0 : SI_RESOURCE_FLAG_READ_ONLY) | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_IMMUTABLE, align(binary.rx_size, SI_CPDMA_ALIGNMENT), 256); if (!shader->bo) return false; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e75a4015ceb..8e10c9a35d7 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -3400,7 +3400,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (update_esgs) { pipe_resource_reference(&sctx->esgs_ring, NULL); sctx->esgs_ring = - pipe_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + pipe_aligned_buffer_create(sctx->b.screen, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, esgs_ring_size, sctx->screen->info.pte_fragment_size); if (!sctx->esgs_ring) return false; @@ -3409,7 +3411,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (update_gsvs) { pipe_resource_reference(&sctx->gsvs_ring, NULL); sctx->gsvs_ring = - pipe_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + pipe_aligned_buffer_create(sctx->b.screen, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, gsvs_ring_size, sctx->screen->info.pte_fragment_size); if (!sctx->gsvs_ring) return false; @@ -3655,7 +3659,9 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) si_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = si_aligned_buffer_create( - &sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, scratch_needed_size, + &sctx->screen->b, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, scratch_needed_size, sctx->screen->info.pte_fragment_size); if (!sctx->scratch_buffer) return false; @@ -3690,7 +3696,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) * receives the high 13 bits. */ sctx->tess_rings = pipe_aligned_buffer_create( - sctx->b.screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, + sctx->b.screen, SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 1 << 19); if (!sctx->tess_rings) return; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index dda309af49f..b7f85906db0 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -1165,10 +1165,12 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements; unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4); - tex->dcc_retile_buffer = si_aligned_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, + tex->dcc_retile_buffer = si_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, dcc_retile_map_size, sscreen->info.tcc_cache_line_size); - struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM, + struct si_resource *buf = si_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_STREAM, dcc_retile_map_size, sscreen->info.tcc_cache_line_size); void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_MAP_WRITE); @@ -1684,7 +1686,7 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou struct pipe_resource resource; struct si_texture *staging; unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; - unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR; + unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR | SI_RESOURCE_FLAG_DRIVER_INTERNAL; /* The pixel shader has a bad access pattern for linear textures. * If a pixel shader is used to blit to/from staging, don't disable caches.