gallium/radeon: change the BO priority definitions to bits
This is for the next microoptimization. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13478>
This commit is contained in:
@@ -70,7 +70,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
|
|||||||
struct r600_ring *ring,
|
struct r600_ring *ring,
|
||||||
struct r600_resource *rbo,
|
struct r600_resource *rbo,
|
||||||
enum radeon_bo_usage usage,
|
enum radeon_bo_usage usage,
|
||||||
enum radeon_bo_priority priority)
|
unsigned priority)
|
||||||
{
|
{
|
||||||
assert(usage);
|
assert(usage);
|
||||||
return rctx->ws->cs_add_buffer(
|
return rctx->ws->cs_add_buffer(
|
||||||
@@ -101,7 +101,7 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
|
|||||||
struct r600_ring *ring,
|
struct r600_ring *ring,
|
||||||
struct r600_resource *rbo,
|
struct r600_resource *rbo,
|
||||||
enum radeon_bo_usage usage,
|
enum radeon_bo_usage usage,
|
||||||
enum radeon_bo_priority priority,
|
unsigned priority,
|
||||||
bool check_mem)
|
bool check_mem)
|
||||||
{
|
{
|
||||||
if (check_mem &&
|
if (check_mem &&
|
||||||
@@ -116,7 +116,7 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
|
|||||||
static inline void r600_emit_reloc(struct r600_common_context *rctx,
|
static inline void r600_emit_reloc(struct r600_common_context *rctx,
|
||||||
struct r600_ring *ring, struct r600_resource *rbo,
|
struct r600_ring *ring, struct r600_resource *rbo,
|
||||||
enum radeon_bo_usage usage,
|
enum radeon_bo_usage usage,
|
||||||
enum radeon_bo_priority priority)
|
unsigned priority)
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = &ring->cs;
|
struct radeon_cmdbuf *cs = &ring->cs;
|
||||||
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_has_virtual_memory;
|
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_has_virtual_memory;
|
||||||
|
@@ -900,7 +900,7 @@ static inline unsigned r600_wavefront_size(enum radeon_family family)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline enum radeon_bo_priority
|
static inline unsigned
|
||||||
r600_get_sampler_view_priority(struct r600_resource *res)
|
r600_get_sampler_view_priority(struct r600_resource *res)
|
||||||
{
|
{
|
||||||
if (res->b.b.target == PIPE_BUFFER)
|
if (res->b.b.target == PIPE_BUFFER)
|
||||||
|
@@ -142,53 +142,49 @@ enum radeon_value_id
|
|||||||
RADEON_CS_THREAD_TIME,
|
RADEON_CS_THREAD_TIME,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum radeon_bo_priority
|
|
||||||
{
|
|
||||||
/* Each group of two has the same priority. */
|
/* Each group of two has the same priority. */
|
||||||
RADEON_PRIO_FENCE = 0,
|
#define RADEON_PRIO_FENCE (1 << 0)
|
||||||
RADEON_PRIO_TRACE,
|
#define RADEON_PRIO_TRACE (1 << 1)
|
||||||
|
|
||||||
RADEON_PRIO_SO_FILLED_SIZE = 2,
|
#define RADEON_PRIO_SO_FILLED_SIZE (1 << 2)
|
||||||
RADEON_PRIO_QUERY,
|
#define RADEON_PRIO_QUERY (1 << 3)
|
||||||
|
|
||||||
RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
|
#define RADEON_PRIO_IB1 (1 << 4) /* main IB submitted to the kernel */
|
||||||
RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
|
#define RADEON_PRIO_IB2 (1 << 5) /* IB executed with INDIRECT_BUFFER */
|
||||||
|
|
||||||
RADEON_PRIO_DRAW_INDIRECT = 6,
|
#define RADEON_PRIO_DRAW_INDIRECT (1 << 6)
|
||||||
RADEON_PRIO_INDEX_BUFFER,
|
#define RADEON_PRIO_INDEX_BUFFER (1 << 7)
|
||||||
|
|
||||||
RADEON_PRIO_CP_DMA = 8,
|
#define RADEON_PRIO_CP_DMA (1 << 8)
|
||||||
RADEON_PRIO_BORDER_COLORS,
|
#define RADEON_PRIO_BORDER_COLORS (1 << 9)
|
||||||
|
|
||||||
RADEON_PRIO_CONST_BUFFER = 10,
|
#define RADEON_PRIO_CONST_BUFFER (1 << 10)
|
||||||
RADEON_PRIO_DESCRIPTORS,
|
#define RADEON_PRIO_DESCRIPTORS (1 << 11)
|
||||||
|
|
||||||
RADEON_PRIO_SAMPLER_BUFFER = 12,
|
#define RADEON_PRIO_SAMPLER_BUFFER (1 << 12)
|
||||||
RADEON_PRIO_VERTEX_BUFFER,
|
#define RADEON_PRIO_VERTEX_BUFFER (1 << 13)
|
||||||
|
|
||||||
RADEON_PRIO_SHADER_RW_BUFFER = 14,
|
#define RADEON_PRIO_SHADER_RW_BUFFER (1 << 14)
|
||||||
RADEON_PRIO_COMPUTE_GLOBAL,
|
#define RADEON_PRIO_COMPUTE_GLOBAL (1 << 15)
|
||||||
|
|
||||||
RADEON_PRIO_SAMPLER_TEXTURE = 16,
|
#define RADEON_PRIO_SAMPLER_TEXTURE (1 << 16)
|
||||||
RADEON_PRIO_SHADER_RW_IMAGE,
|
#define RADEON_PRIO_SHADER_RW_IMAGE (1 << 17)
|
||||||
|
|
||||||
RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18,
|
#define RADEON_PRIO_SAMPLER_TEXTURE_MSAA (1 << 18)
|
||||||
RADEON_PRIO_COLOR_BUFFER,
|
#define RADEON_PRIO_COLOR_BUFFER (1 << 19)
|
||||||
|
|
||||||
RADEON_PRIO_DEPTH_BUFFER = 20,
|
#define RADEON_PRIO_DEPTH_BUFFER (1 << 20)
|
||||||
|
|
||||||
RADEON_PRIO_COLOR_BUFFER_MSAA = 22,
|
#define RADEON_PRIO_COLOR_BUFFER_MSAA (1 << 22)
|
||||||
|
|
||||||
RADEON_PRIO_DEPTH_BUFFER_MSAA = 24,
|
#define RADEON_PRIO_DEPTH_BUFFER_MSAA (1 << 24)
|
||||||
|
|
||||||
RADEON_PRIO_SEPARATE_META = 26,
|
#define RADEON_PRIO_SEPARATE_META (1 << 26)
|
||||||
RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */
|
#define RADEON_PRIO_SHADER_BINARY (1 << 27) /* the hw can't hide instruction cache misses */
|
||||||
|
|
||||||
RADEON_PRIO_SHADER_RINGS = 28,
|
#define RADEON_PRIO_SHADER_RINGS (1 << 28)
|
||||||
|
|
||||||
RADEON_PRIO_SCRATCH_BUFFER = 30,
|
#define RADEON_PRIO_SCRATCH_BUFFER (1 << 30)
|
||||||
/* 31 is the maximum value */
|
|
||||||
};
|
|
||||||
|
|
||||||
struct winsys_handle;
|
struct winsys_handle;
|
||||||
struct radeon_winsys_ctx;
|
struct radeon_winsys_ctx;
|
||||||
@@ -539,7 +535,7 @@ struct radeon_winsys {
|
|||||||
*/
|
*/
|
||||||
unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf,
|
unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf,
|
||||||
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
||||||
enum radeon_bo_priority priority);
|
unsigned priority);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the index of an already-added buffer.
|
* Return the index of an already-added buffer.
|
||||||
|
@@ -498,42 +498,39 @@ void si_log_hw_flush(struct si_context *sctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *priority_to_string(enum radeon_bo_priority priority)
|
static const char *priority_to_string(unsigned priority)
|
||||||
{
|
{
|
||||||
#define ITEM(x) [RADEON_PRIO_##x] = #x
|
#define ITEM(x) if (priority == RADEON_PRIO_##x) return #x
|
||||||
static const char *table[64] = {
|
ITEM(FENCE);
|
||||||
ITEM(FENCE),
|
ITEM(TRACE);
|
||||||
ITEM(TRACE),
|
ITEM(SO_FILLED_SIZE);
|
||||||
ITEM(SO_FILLED_SIZE),
|
ITEM(QUERY);
|
||||||
ITEM(QUERY),
|
ITEM(IB1);
|
||||||
ITEM(IB1),
|
ITEM(IB2);
|
||||||
ITEM(IB2),
|
ITEM(DRAW_INDIRECT);
|
||||||
ITEM(DRAW_INDIRECT),
|
ITEM(INDEX_BUFFER);
|
||||||
ITEM(INDEX_BUFFER),
|
ITEM(CP_DMA);
|
||||||
ITEM(CP_DMA),
|
ITEM(CONST_BUFFER);
|
||||||
ITEM(CONST_BUFFER),
|
ITEM(DESCRIPTORS);
|
||||||
ITEM(DESCRIPTORS),
|
ITEM(BORDER_COLORS);
|
||||||
ITEM(BORDER_COLORS),
|
ITEM(SAMPLER_BUFFER);
|
||||||
ITEM(SAMPLER_BUFFER),
|
ITEM(VERTEX_BUFFER);
|
||||||
ITEM(VERTEX_BUFFER),
|
ITEM(SHADER_RW_BUFFER);
|
||||||
ITEM(SHADER_RW_BUFFER),
|
ITEM(COMPUTE_GLOBAL);
|
||||||
ITEM(COMPUTE_GLOBAL),
|
ITEM(SAMPLER_TEXTURE);
|
||||||
ITEM(SAMPLER_TEXTURE),
|
ITEM(SHADER_RW_IMAGE);
|
||||||
ITEM(SHADER_RW_IMAGE),
|
ITEM(SAMPLER_TEXTURE_MSAA);
|
||||||
ITEM(SAMPLER_TEXTURE_MSAA),
|
ITEM(COLOR_BUFFER);
|
||||||
ITEM(COLOR_BUFFER),
|
ITEM(DEPTH_BUFFER);
|
||||||
ITEM(DEPTH_BUFFER),
|
ITEM(COLOR_BUFFER_MSAA);
|
||||||
ITEM(COLOR_BUFFER_MSAA),
|
ITEM(DEPTH_BUFFER_MSAA);
|
||||||
ITEM(DEPTH_BUFFER_MSAA),
|
ITEM(SEPARATE_META);
|
||||||
ITEM(SEPARATE_META),
|
ITEM(SHADER_BINARY);
|
||||||
ITEM(SHADER_BINARY),
|
ITEM(SHADER_RINGS);
|
||||||
ITEM(SHADER_RINGS),
|
ITEM(SCRATCH_BUFFER);
|
||||||
ITEM(SCRATCH_BUFFER),
|
|
||||||
};
|
|
||||||
#undef ITEM
|
#undef ITEM
|
||||||
|
|
||||||
assert(priority < ARRAY_SIZE(table));
|
return "";
|
||||||
return table[priority];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bo_list_compare_va(const struct radeon_bo_list_item *a,
|
static int bo_list_compare_va(const struct radeon_bo_list_item *a,
|
||||||
@@ -582,7 +579,7 @@ static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_c
|
|||||||
if (!(saved->bo_list[i].priority_usage & (1u << j)))
|
if (!(saved->bo_list[i].priority_usage & (1u << j)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
|
fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(1u << j));
|
||||||
hit = true;
|
hit = true;
|
||||||
}
|
}
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
|
@@ -188,7 +188,7 @@ si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *de
|
|||||||
|
|
||||||
/* SAMPLER VIEWS */
|
/* SAMPLER VIEWS */
|
||||||
|
|
||||||
static inline enum radeon_bo_priority si_get_sampler_view_priority(struct si_resource *res)
|
static inline unsigned si_get_sampler_view_priority(struct si_resource *res)
|
||||||
{
|
{
|
||||||
if (res->b.b.target == PIPE_BUFFER)
|
if (res->b.b.target == PIPE_BUFFER)
|
||||||
return RADEON_PRIO_SAMPLER_BUFFER;
|
return RADEON_PRIO_SAMPLER_BUFFER;
|
||||||
@@ -219,7 +219,7 @@ static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_reso
|
|||||||
bool check_mem)
|
bool check_mem)
|
||||||
{
|
{
|
||||||
struct si_texture *tex = (struct si_texture *)resource;
|
struct si_texture *tex = (struct si_texture *)resource;
|
||||||
enum radeon_bo_priority priority;
|
unsigned priority;
|
||||||
|
|
||||||
if (!resource)
|
if (!resource)
|
||||||
return;
|
return;
|
||||||
@@ -1027,8 +1027,8 @@ static void si_init_buffer_resources(struct si_context *sctx,
|
|||||||
struct si_buffer_resources *buffers,
|
struct si_buffer_resources *buffers,
|
||||||
struct si_descriptors *descs, unsigned num_buffers,
|
struct si_descriptors *descs, unsigned num_buffers,
|
||||||
short shader_userdata_rel_index,
|
short shader_userdata_rel_index,
|
||||||
enum radeon_bo_priority priority,
|
unsigned priority,
|
||||||
enum radeon_bo_priority priority_constbuf)
|
unsigned priority_constbuf)
|
||||||
{
|
{
|
||||||
buffers->priority = priority;
|
buffers->priority = priority;
|
||||||
buffers->priority_constbuf = priority_constbuf;
|
buffers->priority_constbuf = priority_constbuf;
|
||||||
@@ -1331,7 +1331,7 @@ void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot
|
|||||||
static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,
|
static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,
|
||||||
unsigned descriptors_idx, uint slot,
|
unsigned descriptors_idx, uint slot,
|
||||||
const struct pipe_shader_buffer *sbuffer, bool writable,
|
const struct pipe_shader_buffer *sbuffer, bool writable,
|
||||||
enum radeon_bo_priority priority)
|
unsigned priority)
|
||||||
{
|
{
|
||||||
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
|
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
|
||||||
uint32_t *desc = descs->list + slot * 4;
|
uint32_t *desc = descs->list + slot * 4;
|
||||||
@@ -1608,7 +1608,7 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx)
|
|||||||
*/
|
*/
|
||||||
static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
|
static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
|
||||||
unsigned descriptors_idx, uint64_t slot_mask,
|
unsigned descriptors_idx, uint64_t slot_mask,
|
||||||
struct pipe_resource *buf, enum radeon_bo_priority priority)
|
struct pipe_resource *buf, unsigned priority)
|
||||||
{
|
{
|
||||||
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
|
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
|
||||||
bool noop = true;
|
bool noop = true;
|
||||||
|
@@ -1941,7 +1941,7 @@ static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_dra
|
|||||||
*/
|
*/
|
||||||
static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct radeon_cmdbuf *cs,
|
static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct radeon_cmdbuf *cs,
|
||||||
struct si_resource *bo, enum radeon_bo_usage usage,
|
struct si_resource *bo, enum radeon_bo_usage usage,
|
||||||
enum radeon_bo_priority priority)
|
unsigned priority)
|
||||||
{
|
{
|
||||||
assert(usage);
|
assert(usage);
|
||||||
sctx->ws->cs_add_buffer(cs, bo->buf, (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
|
sctx->ws->cs_add_buffer(cs, bo->buf, (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
|
||||||
@@ -1966,7 +1966,7 @@ static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct rad
|
|||||||
static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx,
|
static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx,
|
||||||
struct si_resource *bo,
|
struct si_resource *bo,
|
||||||
enum radeon_bo_usage usage,
|
enum radeon_bo_usage usage,
|
||||||
enum radeon_bo_priority priority,
|
unsigned priority,
|
||||||
bool check_mem)
|
bool check_mem)
|
||||||
{
|
{
|
||||||
if (check_mem &&
|
if (check_mem &&
|
||||||
|
@@ -454,8 +454,8 @@ struct si_buffer_resources {
|
|||||||
struct pipe_resource **buffers; /* this has num_buffers elements */
|
struct pipe_resource **buffers; /* this has num_buffers elements */
|
||||||
unsigned *offsets; /* this has num_buffers elements */
|
unsigned *offsets; /* this has num_buffers elements */
|
||||||
|
|
||||||
enum radeon_bo_priority priority : 6;
|
unsigned priority;
|
||||||
enum radeon_bo_priority priority_constbuf : 6;
|
unsigned priority_constbuf;
|
||||||
|
|
||||||
/* The i-th bit is set if that element is enabled (non-NULL resource). */
|
/* The i-th bit is set if that element is enabled (non-NULL resource). */
|
||||||
uint64_t enabled_mask;
|
uint64_t enabled_mask;
|
||||||
|
@@ -645,7 +645,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
|||||||
struct pb_buffer *buf,
|
struct pb_buffer *buf,
|
||||||
enum radeon_bo_usage usage,
|
enum radeon_bo_usage usage,
|
||||||
enum radeon_bo_domain domains,
|
enum radeon_bo_domain domains,
|
||||||
enum radeon_bo_priority priority)
|
unsigned priority)
|
||||||
{
|
{
|
||||||
/* Don't use the "domains" parameter. Amdgpu doesn't support changing
|
/* Don't use the "domains" parameter. Amdgpu doesn't support changing
|
||||||
* the buffer placement during command submission.
|
* the buffer placement during command submission.
|
||||||
@@ -662,7 +662,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
|||||||
*/
|
*/
|
||||||
if (bo == cs->last_added_bo &&
|
if (bo == cs->last_added_bo &&
|
||||||
(usage & cs->last_added_bo_usage) == usage &&
|
(usage & cs->last_added_bo_usage) == usage &&
|
||||||
(1u << priority) & cs->last_added_bo_priority_usage)
|
priority & cs->last_added_bo_priority_usage)
|
||||||
return cs->last_added_bo_index;
|
return cs->last_added_bo_index;
|
||||||
|
|
||||||
if (!(bo->base.usage & RADEON_FLAG_SPARSE)) {
|
if (!(bo->base.usage & RADEON_FLAG_SPARSE)) {
|
||||||
@@ -691,7 +691,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
|||||||
buffer = &cs->sparse_buffers[index];
|
buffer = &cs->sparse_buffers[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer->u.real.priority_usage |= 1u << priority;
|
buffer->u.real.priority_usage |= priority;
|
||||||
buffer->usage |= usage;
|
buffer->usage |= usage;
|
||||||
|
|
||||||
cs->last_added_bo = bo;
|
cs->last_added_bo = bo;
|
||||||
|
@@ -362,7 +362,7 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
|||||||
struct pb_buffer *buf,
|
struct pb_buffer *buf,
|
||||||
enum radeon_bo_usage usage,
|
enum radeon_bo_usage usage,
|
||||||
enum radeon_bo_domain domains,
|
enum radeon_bo_domain domains,
|
||||||
enum radeon_bo_priority priority)
|
unsigned priority)
|
||||||
{
|
{
|
||||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||||
struct radeon_bo *bo = (struct radeon_bo*)buf;
|
struct radeon_bo *bo = (struct radeon_bo*)buf;
|
||||||
@@ -394,8 +394,11 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
|||||||
added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
|
added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
|
||||||
reloc->read_domains |= rd;
|
reloc->read_domains |= rd;
|
||||||
reloc->write_domain |= wd;
|
reloc->write_domain |= wd;
|
||||||
reloc->flags = MAX2(reloc->flags, priority);
|
|
||||||
cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
|
/* The priority must be in [0, 15]. It's used by the kernel memory management. */
|
||||||
|
unsigned bo_priority = util_last_bit(priority) / 2;
|
||||||
|
reloc->flags = MAX2(reloc->flags, bo_priority);
|
||||||
|
cs->csc->relocs_bo[index].u.real.priority_usage |= priority;
|
||||||
|
|
||||||
if (added_domains & RADEON_DOMAIN_VRAM)
|
if (added_domains & RADEON_DOMAIN_VRAM)
|
||||||
rcs->used_vram_kb += bo->base.size / 1024;
|
rcs->used_vram_kb += bo->base.size / 1024;
|
||||||
|
Reference in New Issue
Block a user