i915: Emit a single relocation per vbo
Reducing the number of relocations has lots of nice knock-on effects, not least including reducing batch buffer size, auxilliary array sizes (vmalloced and copied into the kernel), processing of uncached relocations etc. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
@@ -29,7 +29,6 @@
|
||||
#define I915CONTEXT_INC
|
||||
|
||||
#include "intel_context.h"
|
||||
#include "i915_reg.h"
|
||||
|
||||
#define I915_FALLBACK_TEXTURE 0x1000
|
||||
#define I915_FALLBACK_COLORMASK 0x2000
|
||||
@@ -126,6 +125,12 @@ enum {
|
||||
#define I915_MAX_CONSTANT 32
|
||||
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
|
||||
|
||||
#define I915_MAX_TEX_INDIRECT 4
|
||||
#define I915_MAX_TEX_INSN 32
|
||||
#define I915_MAX_ALU_INSN 64
|
||||
#define I915_MAX_DECL_INSN 27
|
||||
#define I915_MAX_TEMPORARY 16
|
||||
|
||||
#define I915_MAX_INSN (I915_MAX_DECL_INSN + \
|
||||
I915_MAX_TEX_INSN + \
|
||||
I915_MAX_ALU_INSN)
|
||||
@@ -264,6 +269,9 @@ struct i915_context
|
||||
|
||||
struct i915_fragment_program *current_program;
|
||||
|
||||
drm_intel_bo *current_vb_bo;
|
||||
unsigned int current_vertex_size;
|
||||
|
||||
struct i915_hw_state state;
|
||||
uint32_t last_draw_offset;
|
||||
GLuint last_sampler;
|
||||
|
@@ -1422,6 +1422,10 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
||||
intel->vertex_attr_count,
|
||||
intel->ViewportMatrix.m, 0);
|
||||
|
||||
assert(intel->prim.current_offset == intel->prim.start_offset);
|
||||
intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
|
||||
intel->prim.current_offset = intel->prim.start_offset;
|
||||
|
||||
intel->vertex_size >>= 2;
|
||||
|
||||
i915->state.Ctx[I915_CTXREG_LIS2] = s2;
|
||||
|
@@ -361,13 +361,6 @@
|
||||
/* p222 */
|
||||
|
||||
|
||||
#define I915_MAX_TEX_INDIRECT 4
|
||||
#define I915_MAX_TEX_INSN 32
|
||||
#define I915_MAX_ALU_INSN 64
|
||||
#define I915_MAX_DECL_INSN 27
|
||||
#define I915_MAX_TEMPORARY 16
|
||||
|
||||
|
||||
/* Each instruction is 3 dwords long, though most don't require all
|
||||
* this space. Maximum of 123 instructions. Smaller maxes per insn
|
||||
* type.
|
||||
|
@@ -678,6 +678,9 @@ i915_new_batch(struct intel_context *intel)
|
||||
i915->state.emitted = 0;
|
||||
i915->last_draw_offset = 0;
|
||||
i915->last_sampler = 0;
|
||||
|
||||
i915->current_vb_bo = NULL;
|
||||
i915->current_vertex_size = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -54,6 +54,7 @@
|
||||
#include "intel_span.h"
|
||||
#include "i830_context.h"
|
||||
#include "i830_reg.h"
|
||||
#include "i915_context.h"
|
||||
|
||||
static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim);
|
||||
static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
|
||||
@@ -215,7 +216,7 @@ void intel_flush_prim(struct intel_context *intel)
|
||||
offset = intel->prim.start_offset;
|
||||
intel->prim.start_offset = intel->prim.current_offset;
|
||||
if (intel->gen < 3)
|
||||
intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
|
||||
intel->prim.current_offset = intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
|
||||
intel->prim.flush = NULL;
|
||||
|
||||
intel->vtbl.emit_state(intel);
|
||||
@@ -240,20 +241,39 @@ void intel_flush_prim(struct intel_context *intel)
|
||||
#endif
|
||||
|
||||
if (intel->gen >= 3) {
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
|
||||
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
|
||||
assert((offset & ~S0_VB_OFFSET_MASK) == 0);
|
||||
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
|
||||
OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
|
||||
(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
|
||||
struct i915_context *i915 = i915_context(&intel->ctx);
|
||||
unsigned int cmd = 0, len = 0;
|
||||
|
||||
if (vb_bo != i915->current_vb_bo) {
|
||||
cmd |= I1_LOAD_S(0);
|
||||
len++;
|
||||
}
|
||||
|
||||
if (intel->vertex_size != i915->current_vertex_size) {
|
||||
cmd |= I1_LOAD_S(1);
|
||||
len++;
|
||||
}
|
||||
if (len)
|
||||
len++;
|
||||
|
||||
BEGIN_BATCH(2+len);
|
||||
if (cmd)
|
||||
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | cmd | (len - 2));
|
||||
if (vb_bo != i915->current_vb_bo) {
|
||||
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
|
||||
i915->current_vb_bo = vb_bo;
|
||||
}
|
||||
if (intel->vertex_size != i915->current_vertex_size) {
|
||||
OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
|
||||
(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
|
||||
i915->current_vertex_size = intel->vertex_size;
|
||||
}
|
||||
OUT_BATCH(_3DPRIMITIVE |
|
||||
PRIM_INDIRECT |
|
||||
PRIM_INDIRECT_SEQUENTIAL |
|
||||
intel->prim.primitive |
|
||||
count);
|
||||
OUT_BATCH(0); /* Beginning vertex index */
|
||||
OUT_BATCH(offset / (intel->vertex_size * 4));
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
struct i830_context *i830 = i830_context(&intel->ctx);
|
||||
|
Reference in New Issue
Block a user