Files
third_party_mesa3d/src/mesa/drivers/dri/i915/intel_batchbuffer.c
Dave Airlie 86740f1593 remove temporary code from keithw, this in theory isn't used at the moment
anyways unless INTEL_BATCH is turned on
2006-05-05 06:53:11 +00:00

785 lines
19 KiB
C

/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include <stdio.h>
#include <errno.h>
#include "mtypes.h"
#include "context.h"
#include "enums.h"
#include "vblank.h"
#include "intel_reg.h"
#include "intel_batchbuffer.h"
#include "intel_context.h"
/* ================================================================
* Performance monitoring functions
*/
static void intel_fill_box( intelContextPtr intel,
GLshort x, GLshort y,
GLshort w, GLshort h,
GLubyte r, GLubyte g, GLubyte b )
{
x += intel->drawX;
y += intel->drawY;
if (x >= 0 && y >= 0 &&
x+w < intel->intelScreen->width &&
y+h < intel->intelScreen->height)
intelEmitFillBlitLocked( intel,
intel->intelScreen->cpp,
intel->intelScreen->back.pitch,
intel->intelScreen->back.offset,
x, y, w, h,
INTEL_PACKCOLOR(intel->intelScreen->fbFormat,
r,g,b,0xff));
}
static void intel_draw_performance_boxes( intelContextPtr intel )
{
/* Purple box for page flipping
*/
if ( intel->perf_boxes & I830_BOX_FLIP )
intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 );
/* Red box if we have to wait for idle at any point
*/
if ( intel->perf_boxes & I830_BOX_WAIT )
intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 );
/* Blue box: lost context?
*/
if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT )
intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 );
/* Yellow box for texture swaps
*/
if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD )
intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 );
/* Green box if hardware never idles (as far as we can tell)
*/
if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) )
intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 );
/* Draw bars indicating number of buffers allocated
* (not a great measure, easily confused)
*/
#if 0
if (intel->dma_used) {
int bar = intel->dma_used / 10240;
if (bar > 100) bar = 100;
if (bar < 1) bar = 1;
intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 );
intel->dma_used = 0;
}
#endif
intel->perf_boxes = 0;
}
static int bad_prim_vertex_nr( int primitive, int nr )
{
switch (primitive & PRIM3D_MASK) {
case PRIM3D_POINTLIST:
return nr < 1;
case PRIM3D_LINELIST:
return (nr & 1) || nr == 0;
case PRIM3D_LINESTRIP:
return nr < 2;
case PRIM3D_TRILIST:
case PRIM3D_RECTLIST:
return nr % 3 || nr == 0;
case PRIM3D_POLY:
case PRIM3D_TRIFAN:
case PRIM3D_TRISTRIP:
case PRIM3D_TRISTRIP_RVRSE:
return nr < 3;
default:
return 1;
}
}
static void intel_flush_inline_primitive( GLcontext *ctx )
{
intelContextPtr intel = INTEL_CONTEXT( ctx );
GLuint used = intel->batch.ptr - intel->prim.start_ptr;
GLuint vertcount;
assert(intel->prim.primitive != ~0);
if (1) {
/* Check vertex size against the vertex we're specifying to
* hardware. If it's wrong, ditch the primitive.
*/
if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size ))
goto do_discard;
vertcount = (used - 4)/ (intel->vertex_size * 4);
if (!vertcount)
goto do_discard;
if (vertcount * intel->vertex_size * 4 != used - 4) {
fprintf(stderr, "vertex size confusion %d %d\n", used,
intel->vertex_size * vertcount * 4);
goto do_discard;
}
if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) {
fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive,
vertcount);
goto do_discard;
}
}
if (used < 8)
goto do_discard;
*(int *)intel->prim.start_ptr = (_3DPRIMITIVE |
intel->prim.primitive |
(used/4-2));
goto finished;
do_discard:
intel->batch.ptr -= used;
intel->batch.space += used;
assert(intel->batch.space >= 0);
finished:
intel->prim.primitive = ~0;
intel->prim.start_ptr = 0;
intel->prim.flush = 0;
}
/* Emit a primitive referencing vertices in a vertex buffer.
*/
void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim )
{
BATCH_LOCALS;
if (0)
fprintf(stderr, "%s %x\n", __FUNCTION__, prim);
/* Finish any in-progress primitive:
*/
INTEL_FIREVERTICES( intel );
/* Emit outstanding state:
*/
intel->vtbl.emit_state( intel );
/* Make sure there is some space in this buffer:
*/
if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space) {
intelFlushBatch(intel, GL_TRUE);
intel->vtbl.emit_state( intel );
}
#if 1
if (((unsigned long)intel->batch.ptr) & 0x4) {
BEGIN_BATCH(1);
OUT_BATCH(0);
ADVANCE_BATCH();
}
#endif
/* Emit a slot which will be filled with the inline primitive
* command later.
*/
BEGIN_BATCH(2);
OUT_BATCH( 0 );
intel->prim.start_ptr = batch_ptr;
intel->prim.primitive = prim;
intel->prim.flush = intel_flush_inline_primitive;
intel->batch.contains_geometry = 1;
OUT_BATCH( 0 );
ADVANCE_BATCH();
}
void intelRestartInlinePrimitive( intelContextPtr intel )
{
GLuint prim = intel->prim.primitive;
intel_flush_inline_primitive( &intel->ctx );
if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */
intelStartInlinePrimitive( intel, prim );
}
void intelWrapInlinePrimitive( intelContextPtr intel )
{
GLuint prim = intel->prim.primitive;
if (0)
fprintf(stderr, "%s\n", __FUNCTION__);
intel_flush_inline_primitive( &intel->ctx );
intelFlushBatch(intel, GL_TRUE);
intelStartInlinePrimitive( intel, prim );
}
/* Emit a primitive with space for inline vertices.
*/
GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel,
int primitive,
int dwords,
int vertex_size )
{
GLuint *tmp = 0;
BATCH_LOCALS;
if (0)
fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords);
/* Emit outstanding state:
*/
intel->vtbl.emit_state( intel );
if ((1+dwords)*4 >= intel->batch.space) {
intelFlushBatch(intel, GL_TRUE);
intel->vtbl.emit_state( intel );
}
if (1) {
int used = dwords * 4;
int vertcount;
/* Check vertex size against the vertex we're specifying to
* hardware. If it's wrong, ditch the primitive.
*/
if (!intel->vtbl.check_vertex_size( intel, vertex_size ))
goto do_discard;
vertcount = dwords / vertex_size;
if (dwords % vertex_size) {
fprintf(stderr, "did not request a whole number of vertices\n");
goto do_discard;
}
if (bad_prim_vertex_nr( primitive, vertcount )) {
fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount);
goto do_discard;
}
if (used < 8)
goto do_discard;
}
/* Emit 3D_PRIMITIVE commands:
*/
BEGIN_BATCH(1 + dwords);
OUT_BATCH( _3DPRIMITIVE |
primitive |
(dwords-1) );
tmp = (GLuint *)batch_ptr;
batch_ptr += dwords * 4;
ADVANCE_BATCH();
intel->batch.contains_geometry = 1;
do_discard:
return tmp;
}
static void intelWaitForFrameCompletion( intelContextPtr intel )
{
drm_i915_sarea_t *sarea = (drm_i915_sarea_t *)intel->sarea;
if (intel->do_irqs) {
if (intelGetLastFrame(intel) < sarea->last_dispatch) {
if (!intel->irqsEmitted) {
while (intelGetLastFrame (intel) < sarea->last_dispatch)
;
}
else {
UNLOCK_HARDWARE( intel );
intelWaitIrq( intel, intel->alloc.irq_emitted );
LOCK_HARDWARE( intel );
}
intel->irqsEmitted = 10;
}
if (intel->irqsEmitted) {
intelEmitIrqLocked( intel );
intel->irqsEmitted--;
}
}
else {
while (intelGetLastFrame (intel) < sarea->last_dispatch) {
UNLOCK_HARDWARE( intel );
if (intel->do_usleeps)
DO_USLEEP( 1 );
LOCK_HARDWARE( intel );
}
}
}
/*
* Copy the back buffer to the front buffer.
*/
void intelCopyBuffer( const __DRIdrawablePrivate *dPriv,
const drm_clip_rect_t *rect)
{
intelContextPtr intel;
GLboolean missed_target;
int64_t ust;
if (0)
fprintf(stderr, "%s\n", __FUNCTION__);
assert(dPriv);
assert(dPriv->driContextPriv);
assert(dPriv->driContextPriv->driverPrivate);
intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
intelFlush( &intel->ctx );
LOCK_HARDWARE( intel );
intelWaitForFrameCompletion( intel );
if (!rect)
{
UNLOCK_HARDWARE( intel );
driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target );
LOCK_HARDWARE( intel );
}
{
const intelScreenPrivate *intelScreen = intel->intelScreen;
const __DRIdrawablePrivate *dPriv = intel->driDrawable;
const int nbox = dPriv->numClipRects;
const drm_clip_rect_t *pbox = dPriv->pClipRects;
drm_clip_rect_t box;
const int cpp = intelScreen->cpp;
const int pitch = intelScreen->front.pitch; /* in bytes */
int i;
GLuint CMD, BR13;
BATCH_LOCALS;
switch(cpp) {
case 2:
BR13 = (pitch) | (0xCC << 16) | (1<<24);
CMD = XY_SRC_COPY_BLT_CMD;
break;
case 4:
BR13 = (pitch) | (0xCC << 16) | (1<<24) | (1<<25);
CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
XY_SRC_COPY_BLT_WRITE_RGB);
break;
default:
BR13 = (pitch) | (0xCC << 16) | (1<<24);
CMD = XY_SRC_COPY_BLT_CMD;
break;
}
if (0)
intel_draw_performance_boxes( intel );
for (i = 0 ; i < nbox; i++, pbox++)
{
if (pbox->x1 > pbox->x2 ||
pbox->y1 > pbox->y2 ||
pbox->x2 > intelScreen->width ||
pbox->y2 > intelScreen->height) {
_mesa_warning(&intel->ctx, "Bad cliprect in intelCopyBuffer()");
continue;
}
box = *pbox;
if (rect)
{
if (rect->x1 > box.x1)
box.x1 = rect->x1;
if (rect->y1 > box.y1)
box.y1 = rect->y1;
if (rect->x2 < box.x2)
box.x2 = rect->x2;
if (rect->y2 < box.y2)
box.y2 = rect->y2;
if (box.x1 > box.x2 || box.y1 > box.y2)
continue;
}
BEGIN_BATCH( 8);
OUT_BATCH( CMD );
OUT_BATCH( BR13 );
OUT_BATCH( (box.y1 << 16) | box.x1 );
OUT_BATCH( (box.y2 << 16) | box.x2 );
if (intel->sarea->pf_current_page == 0)
OUT_BATCH( intelScreen->front.offset );
else
OUT_BATCH( intelScreen->back.offset );
OUT_BATCH( (box.y1 << 16) | box.x1 );
OUT_BATCH( BR13 & 0xffff );
if (intel->sarea->pf_current_page == 0)
OUT_BATCH( intelScreen->back.offset );
else
OUT_BATCH( intelScreen->front.offset );
ADVANCE_BATCH();
}
}
intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE );
UNLOCK_HARDWARE( intel );
if (!rect)
{
intel->swap_count++;
(*dri_interface->getUST)(&ust);
if (missed_target) {
intel->swap_missed_count++;
intel->swap_missed_ust = ust - intel->swap_ust;
}
intel->swap_ust = ust;
}
}
void intelEmitFillBlitLocked( intelContextPtr intel,
GLuint cpp,
GLshort dst_pitch, /* in bytes */
GLuint dst_offset,
GLshort x, GLshort y,
GLshort w, GLshort h,
GLuint color )
{
GLuint BR13, CMD;
BATCH_LOCALS;
switch(cpp) {
case 1:
case 2:
case 3:
BR13 = dst_pitch | (0xF0 << 16) | (1<<24);
CMD = XY_COLOR_BLT_CMD;
break;
case 4:
BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25);
CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
XY_COLOR_BLT_WRITE_RGB);
break;
default:
return;
}
BEGIN_BATCH( 6);
OUT_BATCH( CMD );
OUT_BATCH( BR13 );
OUT_BATCH( (y << 16) | x );
OUT_BATCH( ((y+h) << 16) | (x+w) );
OUT_BATCH( dst_offset );
OUT_BATCH( color );
ADVANCE_BATCH();
}
/* Copy BitBlt
*/
void intelEmitCopyBlitLocked( intelContextPtr intel,
GLuint cpp,
GLshort src_pitch,
GLuint src_offset,
GLshort dst_pitch,
GLuint dst_offset,
GLshort src_x, GLshort src_y,
GLshort dst_x, GLshort dst_y,
GLshort w, GLshort h )
{
GLuint CMD, BR13;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
BATCH_LOCALS;
src_pitch *= cpp;
dst_pitch *= cpp;
switch(cpp) {
case 1:
case 2:
case 3:
BR13 = dst_pitch | (0xCC << 16) | (1<<24);
CMD = XY_SRC_COPY_BLT_CMD;
break;
case 4:
BR13 = dst_pitch | (0xCC << 16) | (1<<24) | (1<<25);
CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
XY_SRC_COPY_BLT_WRITE_RGB);
break;
default:
return;
}
if (dst_y2 < dst_y ||
dst_x2 < dst_x) {
return;
}
BEGIN_BATCH( 12);
OUT_BATCH( CMD );
OUT_BATCH( BR13 );
OUT_BATCH( (dst_y << 16) | dst_x );
OUT_BATCH( (dst_y2 << 16) | dst_x2 );
OUT_BATCH( dst_offset );
OUT_BATCH( (src_y << 16) | src_x );
OUT_BATCH( src_pitch );
OUT_BATCH( src_offset );
ADVANCE_BATCH();
}
void intelClearWithBlit(GLcontext *ctx, GLbitfield flags, GLboolean all,
GLint cx1, GLint cy1, GLint cw, GLint ch)
{
intelContextPtr intel = INTEL_CONTEXT( ctx );
intelScreenPrivate *intelScreen = intel->intelScreen;
GLuint clear_depth, clear_color;
GLint cx, cy;
GLint pitch;
GLint cpp = intelScreen->cpp;
GLint i;
GLuint BR13, CMD, D_CMD;
BATCH_LOCALS;
intelFlush( &intel->ctx );
LOCK_HARDWARE( intel );
pitch = intelScreen->front.pitch;
clear_color = intel->ClearColor;
clear_depth = 0;
if (flags & BUFFER_BIT_DEPTH) {
clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
}
if (flags & BUFFER_BIT_STENCIL) {
clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
}
switch(cpp) {
case 2:
BR13 = (0xF0 << 16) | (pitch) | (1<<24);
D_CMD = CMD = XY_COLOR_BLT_CMD;
break;
case 4:
BR13 = (0xF0 << 16) | (pitch) | (1<<24) | (1<<25);
CMD = (XY_COLOR_BLT_CMD |
XY_COLOR_BLT_WRITE_ALPHA |
XY_COLOR_BLT_WRITE_RGB);
D_CMD = XY_COLOR_BLT_CMD;
if (flags & BUFFER_BIT_DEPTH) D_CMD |= XY_COLOR_BLT_WRITE_RGB;
if (flags & BUFFER_BIT_STENCIL) D_CMD |= XY_COLOR_BLT_WRITE_ALPHA;
break;
default:
BR13 = (0xF0 << 16) | (pitch) | (1<<24);
D_CMD = CMD = XY_COLOR_BLT_CMD;
break;
}
{
/* flip top to bottom */
cy = intel->driDrawable->h-cy1-ch;
cx = cx1 + intel->drawX;
cy += intel->drawY;
/* adjust for page flipping */
if ( intel->sarea->pf_current_page == 1 ) {
GLuint tmp = flags;
flags &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT);
if ( tmp & BUFFER_BIT_FRONT_LEFT ) flags |= BUFFER_BIT_BACK_LEFT;
if ( tmp & BUFFER_BIT_BACK_LEFT ) flags |= BUFFER_BIT_FRONT_LEFT;
}
for (i = 0 ; i < intel->numClipRects ; i++)
{
drm_clip_rect_t *box = &intel->pClipRects[i];
drm_clip_rect_t b;
if (!all) {
GLint x = box->x1;
GLint y = box->y1;
GLint w = box->x2 - x;
GLint h = box->y2 - y;
if (x < cx) w -= cx - x, x = cx;
if (y < cy) h -= cy - y, y = cy;
if (x + w > cx + cw) w = cx + cw - x;
if (y + h > cy + ch) h = cy + ch - y;
if (w <= 0) continue;
if (h <= 0) continue;
b.x1 = x;
b.y1 = y;
b.x2 = x + w;
b.y2 = y + h;
} else {
b = *box;
}
if (b.x1 > b.x2 ||
b.y1 > b.y2 ||
b.x2 > intelScreen->width ||
b.y2 > intelScreen->height)
continue;
if ( flags & BUFFER_BIT_FRONT_LEFT ) {
BEGIN_BATCH( 6);
OUT_BATCH( CMD );
OUT_BATCH( BR13 );
OUT_BATCH( (b.y1 << 16) | b.x1 );
OUT_BATCH( (b.y2 << 16) | b.x2 );
OUT_BATCH( intelScreen->front.offset );
OUT_BATCH( clear_color );
ADVANCE_BATCH();
}
if ( flags & BUFFER_BIT_BACK_LEFT ) {
BEGIN_BATCH( 6);
OUT_BATCH( CMD );
OUT_BATCH( BR13 );
OUT_BATCH( (b.y1 << 16) | b.x1 );
OUT_BATCH( (b.y2 << 16) | b.x2 );
OUT_BATCH( intelScreen->back.offset );
OUT_BATCH( clear_color );
ADVANCE_BATCH();
}
if ( flags & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
BEGIN_BATCH( 6);
OUT_BATCH( D_CMD );
OUT_BATCH( BR13 );
OUT_BATCH( (b.y1 << 16) | b.x1 );
OUT_BATCH( (b.y2 << 16) | b.x2 );
OUT_BATCH( intelScreen->depth.offset );
OUT_BATCH( clear_depth );
ADVANCE_BATCH();
}
}
}
intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE );
UNLOCK_HARDWARE( intel );
}
void intelDestroyBatchBuffer( GLcontext *ctx )
{
intelContextPtr intel = INTEL_CONTEXT(ctx);
if (intel->alloc.offset) {
intelFreeAGP( intel, intel->alloc.ptr );
intel->alloc.ptr = NULL;
intel->alloc.offset = 0;
}
else if (intel->alloc.ptr) {
free(intel->alloc.ptr);
intel->alloc.ptr = NULL;
}
memset(&intel->batch, 0, sizeof(intel->batch));
}
void intelInitBatchBuffer( GLcontext *ctx )
{
intelContextPtr intel = INTEL_CONTEXT(ctx);
/* This path isn't really safe with rotate:
*/
if (getenv("INTEL_BATCH") && intel->intelScreen->allow_batchbuffer) {
switch (intel->intelScreen->deviceID) {
case PCI_CHIP_I865_G:
/* HW bug? Seems to crash if batchbuffer crosses 4k boundary.
*/
intel->alloc.size = 8 * 1024;
break;
default:
/* This is the smallest amount of memory the kernel deals with.
* We'd ideally like to make this smaller.
*/
intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity;
break;
}
intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size );
if (intel->alloc.ptr)
intel->alloc.offset =
intelAgpOffsetFromVirtual( intel, intel->alloc.ptr );
else
intel->alloc.offset = 0; /* OK? */
}
/* The default is now to use a local buffer and pass that to the
* kernel. This is also a fallback if allocation fails on the
* above path:
*/
if (!intel->alloc.ptr) {
intel->alloc.size = 8 * 1024;
intel->alloc.ptr = malloc( intel->alloc.size );
intel->alloc.offset = 0;
}
assert(intel->alloc.ptr);
}