vbo/dlist: use a shared index buffer

Draws can be merged by u_threaded if they share the same IB.

This improves performance in SPECviewperf13 snx-03: tests fps
are improved by a 1.2x - 2.0x factor.

v2: reworked error handling

Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v2)
Reviewed-by: Zoltán Böszörményi <zboszor@gmail.com> (v2)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8111>
This commit is contained in:
Pierre-Eric Pelloux-Prayer
2020-12-07 17:34:18 +01:00
parent a0314083be
commit 4c751ad67a
4 changed files with 40 additions and 20 deletions

View File

@@ -178,6 +178,8 @@ struct vbo_save_context {
struct vbo_save_vertex_store *vertex_store;
struct vbo_save_primitive_store *prim_store;
struct gl_buffer_object *previous_ib;
unsigned ib_first_free_index;
fi_type *buffer_map; /**< Mapping of vertex_store's buffer */
fi_type *buffer_ptr; /**< cursor, points into buffer_map */

View File

@@ -70,4 +70,6 @@ void vbo_save_destroy( struct gl_context *ctx )
free(save->vertex_store);
save->vertex_store = NULL;
}
_mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
}

View File

@@ -137,6 +137,7 @@ _vbo_save_get_vertex_count(const struct vbo_save_vertex_list *node)
#define VBO_SAVE_BUFFER_SIZE (256*1024) /* dwords */
#define VBO_SAVE_PRIM_SIZE 128
#define VBO_SAVE_PRIM_MODE_MASK 0x3f
#define VBO_SAVE_INDEX_SIZE (32 * 1024)
struct vbo_save_vertex_store {
struct gl_buffer_object *bufferobj;

View File

@@ -649,7 +649,7 @@ compile_vertex_list(struct gl_context *ctx)
/* Create an index buffer. */
node->min_index = node->max_index = 0;
if (save->vert_count) {
if (save->vert_count && node->prim_count) {
/* We won't modify node->prims, so use a const alias to avoid unintended
* writes to it. */
const struct _mesa_prim *original_prims = node->prims;
@@ -665,6 +665,13 @@ compile_vertex_list(struct gl_context *ctx)
* wrap_buffers may call use but the last primitive may not be complete) */
int max_indices_count = MAX2(total_vert_count * 2 - (node->prim_count * 2) + 1,
total_vert_count);
int indices_offset = 0;
int available = save->previous_ib ? (save->previous_ib->Size / 4 - save->ib_first_free_index) : 0;
if (available >= max_indices_count) {
indices_offset = save->ib_first_free_index;
node->min_index = node->max_index = indices_offset;
}
int size = max_indices_count * sizeof(uint32_t);
uint32_t* indices = (uint32_t*) malloc(size);
uint32_t max_index = 0, min_index = 0xFFFFFFFF;
@@ -750,16 +757,13 @@ compile_vertex_list(struct gl_context *ctx)
assert(last_valid_prim <= i);
node->merged.prims = realloc(node->merged.prims, (1 + last_valid_prim) * sizeof(struct _mesa_prim));
node->merged.prims[last_valid_prim] = original_prims[i];
node->merged.prims[last_valid_prim].start = start;
node->merged.prims[last_valid_prim].start = indices_offset + start;
node->merged.prims[last_valid_prim].count = idx - start;
}
node->merged.prims[last_valid_prim].mode = mode;
}
if (idx == 0)
goto skip_node;
assert(idx <= max_indices_count);
assert(idx > 0 && idx <= max_indices_count);
node->merged.prim_count = last_valid_prim + 1;
node->merged.ib.ptr = NULL;
@@ -768,25 +772,36 @@ compile_vertex_list(struct gl_context *ctx)
node->merged.min_index = min_index;
node->merged.max_index = max_index;
node->merged.ib.obj = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
bool success = ctx->Driver.BufferData(ctx,
if (!indices_offset) {
/* Allocate a new index buffer */
_mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
save->previous_ib = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
bool success = ctx->Driver.BufferData(ctx,
GL_ELEMENT_ARRAY_BUFFER_ARB,
idx * sizeof(uint32_t), indices,
MAX2(VBO_SAVE_INDEX_SIZE, idx) * sizeof(uint32_t),
NULL,
GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
node->merged.ib.obj);
save->previous_ib);
if (!success) {
_mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
_mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
}
}
if (success)
goto out;
_mesa_reference_buffer_object(ctx, &node->merged.ib.obj, save->previous_ib);
ctx->Driver.DeleteBuffer(ctx, node->merged.ib.obj);
_mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
if (node->merged.ib.obj) {
ctx->Driver.BufferSubData(ctx,
indices_offset * sizeof(uint32_t),
idx * sizeof(uint32_t),
indices,
node->merged.ib.obj);
save->ib_first_free_index = indices_offset + idx;
} else {
node->vertex_count = 0;
node->prim_count = 0;
}
skip_node:
node->merged.ib.obj = NULL;
node->vertex_count = 0;
node->prim_count = 0;
out:
free(indices);
}