i915: Emit a single relocation per vbo

Reducing the number of relocations has lots of nice knock-on effects,
not least including reducing batch buffer size, auxilliary array sizes
(vmalloced and copied into the kernel), processing of uncached
relocations etc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson
2010-11-25 15:41:37 +00:00
parent 298ebb78de
commit 2c6793fb6b
5 changed files with 45 additions and 17 deletions

View File

@@ -29,7 +29,6 @@
#define I915CONTEXT_INC
#include "intel_context.h"
#include "i915_reg.h"
#define I915_FALLBACK_TEXTURE 0x1000
#define I915_FALLBACK_COLORMASK 0x2000
@@ -126,6 +125,12 @@ enum {
#define I915_MAX_CONSTANT 32
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
#define I915_MAX_TEX_INDIRECT 4
#define I915_MAX_TEX_INSN 32
#define I915_MAX_ALU_INSN 64
#define I915_MAX_DECL_INSN 27
#define I915_MAX_TEMPORARY 16
#define I915_MAX_INSN (I915_MAX_DECL_INSN + \
I915_MAX_TEX_INSN + \
I915_MAX_ALU_INSN)
@@ -264,6 +269,9 @@ struct i915_context
struct i915_fragment_program *current_program;
drm_intel_bo *current_vb_bo;
unsigned int current_vertex_size;
struct i915_hw_state state;
uint32_t last_draw_offset;
GLuint last_sampler;

View File

@@ -1422,6 +1422,10 @@ i915ValidateFragmentProgram(struct i915_context *i915)
intel->vertex_attr_count,
intel->ViewportMatrix.m, 0);
assert(intel->prim.current_offset == intel->prim.start_offset);
intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
intel->prim.current_offset = intel->prim.start_offset;
intel->vertex_size >>= 2;
i915->state.Ctx[I915_CTXREG_LIS2] = s2;

View File

@@ -361,13 +361,6 @@
/* p222 */
#define I915_MAX_TEX_INDIRECT 4
#define I915_MAX_TEX_INSN 32
#define I915_MAX_ALU_INSN 64
#define I915_MAX_DECL_INSN 27
#define I915_MAX_TEMPORARY 16
/* Each instruction is 3 dwords long, though most don't require all
* this space. Maximum of 123 instructions. Smaller maxes per insn
* type.

View File

@@ -678,6 +678,9 @@ i915_new_batch(struct intel_context *intel)
i915->state.emitted = 0;
i915->last_draw_offset = 0;
i915->last_sampler = 0;
i915->current_vb_bo = NULL;
i915->current_vertex_size = 0;
}
static void

View File

@@ -54,6 +54,7 @@
#include "intel_span.h"
#include "i830_context.h"
#include "i830_reg.h"
#include "i915_context.h"
static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim);
static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
@@ -215,7 +216,7 @@ void intel_flush_prim(struct intel_context *intel)
offset = intel->prim.start_offset;
intel->prim.start_offset = intel->prim.current_offset;
if (intel->gen < 3)
intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
intel->prim.current_offset = intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
intel->prim.flush = NULL;
intel->vtbl.emit_state(intel);
@@ -240,20 +241,39 @@ void intel_flush_prim(struct intel_context *intel)
#endif
if (intel->gen >= 3) {
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
assert((offset & ~S0_VB_OFFSET_MASK) == 0);
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
struct i915_context *i915 = i915_context(&intel->ctx);
unsigned int cmd = 0, len = 0;
if (vb_bo != i915->current_vb_bo) {
cmd |= I1_LOAD_S(0);
len++;
}
if (intel->vertex_size != i915->current_vertex_size) {
cmd |= I1_LOAD_S(1);
len++;
}
if (len)
len++;
BEGIN_BATCH(2+len);
if (cmd)
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | cmd | (len - 2));
if (vb_bo != i915->current_vb_bo) {
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
i915->current_vb_bo = vb_bo;
}
if (intel->vertex_size != i915->current_vertex_size) {
OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
i915->current_vertex_size = intel->vertex_size;
}
OUT_BATCH(_3DPRIMITIVE |
PRIM_INDIRECT |
PRIM_INDIRECT_SEQUENTIAL |
intel->prim.primitive |
count);
OUT_BATCH(0); /* Beginning vertex index */
OUT_BATCH(offset / (intel->vertex_size * 4));
ADVANCE_BATCH();
} else {
struct i830_context *i830 = i830_context(&intel->ctx);