i915: Emit a single relocation per vbo
Reducing the number of relocations has lots of nice knock-on effects, not least including reducing batch buffer size, auxilliary array sizes (vmalloced and copied into the kernel), processing of uncached relocations etc. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
@@ -29,7 +29,6 @@
|
|||||||
#define I915CONTEXT_INC
|
#define I915CONTEXT_INC
|
||||||
|
|
||||||
#include "intel_context.h"
|
#include "intel_context.h"
|
||||||
#include "i915_reg.h"
|
|
||||||
|
|
||||||
#define I915_FALLBACK_TEXTURE 0x1000
|
#define I915_FALLBACK_TEXTURE 0x1000
|
||||||
#define I915_FALLBACK_COLORMASK 0x2000
|
#define I915_FALLBACK_COLORMASK 0x2000
|
||||||
@@ -126,6 +125,12 @@ enum {
|
|||||||
#define I915_MAX_CONSTANT 32
|
#define I915_MAX_CONSTANT 32
|
||||||
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
|
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
|
||||||
|
|
||||||
|
#define I915_MAX_TEX_INDIRECT 4
|
||||||
|
#define I915_MAX_TEX_INSN 32
|
||||||
|
#define I915_MAX_ALU_INSN 64
|
||||||
|
#define I915_MAX_DECL_INSN 27
|
||||||
|
#define I915_MAX_TEMPORARY 16
|
||||||
|
|
||||||
#define I915_MAX_INSN (I915_MAX_DECL_INSN + \
|
#define I915_MAX_INSN (I915_MAX_DECL_INSN + \
|
||||||
I915_MAX_TEX_INSN + \
|
I915_MAX_TEX_INSN + \
|
||||||
I915_MAX_ALU_INSN)
|
I915_MAX_ALU_INSN)
|
||||||
@@ -264,6 +269,9 @@ struct i915_context
|
|||||||
|
|
||||||
struct i915_fragment_program *current_program;
|
struct i915_fragment_program *current_program;
|
||||||
|
|
||||||
|
drm_intel_bo *current_vb_bo;
|
||||||
|
unsigned int current_vertex_size;
|
||||||
|
|
||||||
struct i915_hw_state state;
|
struct i915_hw_state state;
|
||||||
uint32_t last_draw_offset;
|
uint32_t last_draw_offset;
|
||||||
GLuint last_sampler;
|
GLuint last_sampler;
|
||||||
|
@@ -1422,6 +1422,10 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
|||||||
intel->vertex_attr_count,
|
intel->vertex_attr_count,
|
||||||
intel->ViewportMatrix.m, 0);
|
intel->ViewportMatrix.m, 0);
|
||||||
|
|
||||||
|
assert(intel->prim.current_offset == intel->prim.start_offset);
|
||||||
|
intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
|
||||||
|
intel->prim.current_offset = intel->prim.start_offset;
|
||||||
|
|
||||||
intel->vertex_size >>= 2;
|
intel->vertex_size >>= 2;
|
||||||
|
|
||||||
i915->state.Ctx[I915_CTXREG_LIS2] = s2;
|
i915->state.Ctx[I915_CTXREG_LIS2] = s2;
|
||||||
|
@@ -361,13 +361,6 @@
|
|||||||
/* p222 */
|
/* p222 */
|
||||||
|
|
||||||
|
|
||||||
#define I915_MAX_TEX_INDIRECT 4
|
|
||||||
#define I915_MAX_TEX_INSN 32
|
|
||||||
#define I915_MAX_ALU_INSN 64
|
|
||||||
#define I915_MAX_DECL_INSN 27
|
|
||||||
#define I915_MAX_TEMPORARY 16
|
|
||||||
|
|
||||||
|
|
||||||
/* Each instruction is 3 dwords long, though most don't require all
|
/* Each instruction is 3 dwords long, though most don't require all
|
||||||
* this space. Maximum of 123 instructions. Smaller maxes per insn
|
* this space. Maximum of 123 instructions. Smaller maxes per insn
|
||||||
* type.
|
* type.
|
||||||
|
@@ -678,6 +678,9 @@ i915_new_batch(struct intel_context *intel)
|
|||||||
i915->state.emitted = 0;
|
i915->state.emitted = 0;
|
||||||
i915->last_draw_offset = 0;
|
i915->last_draw_offset = 0;
|
||||||
i915->last_sampler = 0;
|
i915->last_sampler = 0;
|
||||||
|
|
||||||
|
i915->current_vb_bo = NULL;
|
||||||
|
i915->current_vertex_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@@ -54,6 +54,7 @@
|
|||||||
#include "intel_span.h"
|
#include "intel_span.h"
|
||||||
#include "i830_context.h"
|
#include "i830_context.h"
|
||||||
#include "i830_reg.h"
|
#include "i830_reg.h"
|
||||||
|
#include "i915_context.h"
|
||||||
|
|
||||||
static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim);
|
static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim);
|
||||||
static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
|
static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
|
||||||
@@ -215,7 +216,7 @@ void intel_flush_prim(struct intel_context *intel)
|
|||||||
offset = intel->prim.start_offset;
|
offset = intel->prim.start_offset;
|
||||||
intel->prim.start_offset = intel->prim.current_offset;
|
intel->prim.start_offset = intel->prim.current_offset;
|
||||||
if (intel->gen < 3)
|
if (intel->gen < 3)
|
||||||
intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
|
intel->prim.current_offset = intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
|
||||||
intel->prim.flush = NULL;
|
intel->prim.flush = NULL;
|
||||||
|
|
||||||
intel->vtbl.emit_state(intel);
|
intel->vtbl.emit_state(intel);
|
||||||
@@ -240,20 +241,39 @@ void intel_flush_prim(struct intel_context *intel)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (intel->gen >= 3) {
|
if (intel->gen >= 3) {
|
||||||
BEGIN_BATCH(5);
|
struct i915_context *i915 = i915_context(&intel->ctx);
|
||||||
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
|
unsigned int cmd = 0, len = 0;
|
||||||
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
|
|
||||||
assert((offset & ~S0_VB_OFFSET_MASK) == 0);
|
if (vb_bo != i915->current_vb_bo) {
|
||||||
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
|
cmd |= I1_LOAD_S(0);
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intel->vertex_size != i915->current_vertex_size) {
|
||||||
|
cmd |= I1_LOAD_S(1);
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
if (len)
|
||||||
|
len++;
|
||||||
|
|
||||||
|
BEGIN_BATCH(2+len);
|
||||||
|
if (cmd)
|
||||||
|
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | cmd | (len - 2));
|
||||||
|
if (vb_bo != i915->current_vb_bo) {
|
||||||
|
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
|
||||||
|
i915->current_vb_bo = vb_bo;
|
||||||
|
}
|
||||||
|
if (intel->vertex_size != i915->current_vertex_size) {
|
||||||
OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
|
OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
|
||||||
(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
|
(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
|
||||||
|
i915->current_vertex_size = intel->vertex_size;
|
||||||
|
}
|
||||||
OUT_BATCH(_3DPRIMITIVE |
|
OUT_BATCH(_3DPRIMITIVE |
|
||||||
PRIM_INDIRECT |
|
PRIM_INDIRECT |
|
||||||
PRIM_INDIRECT_SEQUENTIAL |
|
PRIM_INDIRECT_SEQUENTIAL |
|
||||||
intel->prim.primitive |
|
intel->prim.primitive |
|
||||||
count);
|
count);
|
||||||
OUT_BATCH(0); /* Beginning vertex index */
|
OUT_BATCH(offset / (intel->vertex_size * 4));
|
||||||
ADVANCE_BATCH();
|
ADVANCE_BATCH();
|
||||||
} else {
|
} else {
|
||||||
struct i830_context *i830 = i830_context(&intel->ctx);
|
struct i830_context *i830 = i830_context(&intel->ctx);
|
||||||
|
Reference in New Issue
Block a user