radeonsi: use a global dirty mask for shader pointers

Only vertex buffers use a separate bool flag.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák
2017-01-17 21:30:23 +01:00
parent 861d7af1cb
commit cf248929bf
4 changed files with 52 additions and 42 deletions

View File

@@ -246,7 +246,6 @@ static bool si_upload_descriptors(struct si_context *sctx,
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
}
desc->pointer_dirty = true;
desc->dirty_mask = 0;
if (atom)
@@ -1035,9 +1034,9 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
desc->pointer_dirty = true;
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
sctx->vertex_buffers_dirty = false;
sctx->vertex_buffer_pointer_dirty = true;
return true;
}
@@ -1735,26 +1734,21 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
static void si_mark_shader_pointers_dirty(struct si_context *sctx,
unsigned shader)
{
struct si_descriptors *descs =
&sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS];
for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
descs->pointer_dirty = true;
sctx->shader_pointers_dirty |=
u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
SI_NUM_SHADER_DESCS);
if (shader == PIPE_SHADER_VERTEX)
sctx->vertex_buffers.pointer_dirty = true;
sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
}
static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
{
int i;
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_mark_shader_pointers_dirty(sctx, i);
}
sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true;
sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
}
/* Set a base register address for user data constants in the given shader.
@@ -1807,13 +1801,12 @@ void si_shader_change_notify(struct si_context *sctx)
static void si_emit_shader_pointer(struct si_context *sctx,
struct si_descriptors *desc,
unsigned sh_base, bool keep_dirty)
unsigned sh_base)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint64_t va;
if (!desc->pointer_dirty || !desc->buffer)
return;
assert(desc->buffer);
va = desc->buffer->gpu_address +
desc->buffer_offset;
@@ -1822,55 +1815,66 @@ static void si_emit_shader_pointer(struct si_context *sctx,
radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
desc->pointer_dirty = keep_dirty;
}
void si_emit_graphics_shader_userdata(struct si_context *sctx,
struct r600_atom *atom)
{
unsigned shader;
unsigned mask;
uint32_t *sh_base = sctx->shader_userdata.sh_base;
struct si_descriptors *descs;
descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
if (descs->pointer_dirty) {
if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
si_emit_shader_pointer(sctx, descs,
R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
R_00B030_SPI_SHADER_USER_DATA_PS_0);
si_emit_shader_pointer(sctx, descs,
R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
R_00B130_SPI_SHADER_USER_DATA_VS_0);
si_emit_shader_pointer(sctx, descs,
R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs,
R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
R_00B330_SPI_SHADER_USER_DATA_ES_0);
si_emit_shader_pointer(sctx, descs,
R_00B430_SPI_SHADER_USER_DATA_HS_0, true);
descs->pointer_dirty = false;
R_00B430_SPI_SHADER_USER_DATA_HS_0);
}
descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER];
mask = sctx->shader_pointers_dirty &
u_bit_consecutive(SI_DESCS_FIRST_SHADER,
SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) {
while (mask) {
unsigned i = u_bit_scan(&mask);
unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
unsigned base = sh_base[shader];
unsigned i;
if (!base)
continue;
for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++)
si_emit_shader_pointer(sctx, descs, base, false);
if (base)
si_emit_shader_pointer(sctx, descs + i, base);
}
sctx->shader_pointers_dirty &=
~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
if (sctx->vertex_buffer_pointer_dirty) {
si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
sh_base[PIPE_SHADER_VERTEX]);
sctx->vertex_buffer_pointer_dirty = false;
}
si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
}
void si_emit_compute_shader_userdata(struct si_context *sctx)
{
unsigned base = R_00B900_COMPUTE_USER_DATA_0;
struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE];
struct si_descriptors *descs = sctx->descriptors;
unsigned compute_mask =
u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS);
unsigned mask = sctx->shader_pointers_dirty & compute_mask;
for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
si_emit_shader_pointer(sctx, descs, base, false);
while (mask) {
unsigned i = u_bit_scan(&mask);
si_emit_shader_pointer(sctx, descs + i, base);
}
sctx->shader_pointers_dirty &= ~compute_mask;
}
/* INIT/DEINIT/UPLOAD */
@@ -1939,6 +1943,9 @@ bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
unsigned dirty = sctx->descriptors_dirty & mask;
/* Assume nothing will go wrong: */
sctx->shader_pointers_dirty |= dirty;
while (dirty) {
unsigned i = u_bit_scan(&dirty);
@@ -1960,6 +1967,9 @@ bool si_upload_compute_shader_descriptors(struct si_context *sctx)
SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
unsigned dirty = sctx->descriptors_dirty & mask;
/* Assume nothing will go wrong: */
sctx->shader_pointers_dirty |= dirty;
while (dirty) {
unsigned i = u_bit_scan(&dirty);

View File

@@ -268,6 +268,7 @@ struct si_context {
struct si_descriptors vertex_buffers;
struct si_descriptors descriptors[SI_NUM_DESCS];
unsigned descriptors_dirty;
unsigned shader_pointers_dirty;
unsigned compressed_tex_shader_mask;
struct si_buffer_resources rw_buffers;
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
@@ -288,6 +289,7 @@ struct si_context {
/* Vertex and index buffers. */
bool vertex_buffers_dirty;
bool vertex_buffer_pointer_dirty;
struct pipe_index_buffer index_buffer;
struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];

View File

@@ -237,8 +237,6 @@ struct si_descriptors {
/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
* array will be stored. */
unsigned shader_userdata_offset;
/* Whether the pointer should be re-emitted. */
bool pointer_dirty;
};
struct si_sampler_views {

View File

@@ -1146,7 +1146,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
/* Vertex buffer descriptors are uploaded uncached, so prefetch
* them right after the VS binary. */
if (sctx->vertex_buffers.pointer_dirty) {
if (sctx->vertex_buffer_pointer_dirty) {
cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
sctx->vertex_buffers.buffer_offset,
sctx->vertex_elements->count * 16);