Invalidate current fastpath on changes to attribute size or offset within

the vertex.

Use existing facilities to check for sse2 and enable when available.

Turn on SSE/SSE2 codegen for t_vertex.c by default when USE_SSE_ASM is
defined.  Disable with "MESA_NO_CODEGEN=t".
This commit is contained in:
Keith Whitwell
2005-05-19 20:25:32 +00:00
parent c2745ffa49
commit 18a74321aa
3 changed files with 32 additions and 7 deletions

View File

@@ -570,7 +570,9 @@ struct tnl_clipspace_fastpath {
struct {
GLuint format;
GLuint size;
GLuint stride;
GLuint offset;
} *attr;
tnl_emit_func func;

View File

@@ -46,7 +46,9 @@ static GLboolean match_fastpath( struct tnl_clipspace *vtx,
return GL_FALSE;
for (j = 0; j < vtx->attr_count; j++)
if (vtx->attr[j].format != fp->attr[j].format)
if (vtx->attr[j].format != fp->attr[j].format ||
vtx->attr[j].inputsize != fp->attr[j].size ||
vtx->attr[j].vertoffset != fp->attr[j].offset)
return GL_FALSE;
if (fp->match_strides) {
@@ -90,6 +92,8 @@ void _tnl_register_fastpath( struct tnl_clipspace *vtx,
for (i = 0; i < vtx->attr_count; i++) {
fastpath->attr[i].format = vtx->attr[i].format;
fastpath->attr[i].stride = vtx->attr[i].inputstride;
fastpath->attr[i].size = vtx->attr[i].inputsize;
fastpath->attr[i].offset = vtx->attr[i].vertoffset;
}
fastpath->next = vtx->fastpath;
@@ -470,8 +474,8 @@ void _tnl_init_vertices( GLcontext *ctx,
vtx->codegen_emit = NULL;
#ifdef __i386__
if (getenv("MESA_EXPERIMENTAL"))
#ifdef USE_SSE_ASM
if (!_mesa_getenv("MESA_NO_CODEGEN"))
vtx->codegen_emit = _tnl_generate_sse_emit;
#endif
}

View File

@@ -33,12 +33,14 @@
#include "simple_list.h"
#include "enums.h"
#if defined(USE_X86_ASM)
#define X 0
#define Y 1
#define Z 2
#define W 3
#define DISASSEM 1
#define DISASSEM 0
struct x86_reg {
GLuint file:3;
@@ -1208,18 +1210,26 @@ static GLboolean build_vertex_emit( struct x86_program *p )
return GL_TRUE;
}
#include "x86/common_x86_asm.h"
void _tnl_generate_sse_emit( GLcontext *ctx )
{
struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
struct x86_program p;
if (!cpu_has_xmm) {
vtx->codegen_emit = NULL;
return;
}
memset(&p, 0, sizeof(p));
p.ctx = ctx;
p.store = MALLOC(1024);
p.inputs_safe = 1; /* for now */
p.inputs_safe = 0; /* for now */
p.outputs_safe = 1; /* for now */
p.have_sse2 = 1; /* testing */
p.have_sse2 = cpu_has_xmm2;
p.identity = make_reg(file_XMM, 6);
p.chan0 = make_reg(file_XMM, 7);
@@ -1246,3 +1256,12 @@ void _tnl_generate_sse_emit( GLcontext *ctx )
(void)sse2_packsswb;
(void)sse2_pshufd;
}
#else
void _tnl_generate_sse_emit( GLcontext *ctx )
{
/* Dummy version for when USE_SSE_ASM not defined */
}
#endif