Invalidate current fastpath on changes to attribute size or offset within
the vertex. Use existing facilities to check for sse2 and enable when available. Turn on SSE/SSE2 codegen for t_vertex.c by default when USE_SSE_ASM is defined. Disable with "MESA_NO_CODEGEN=t".
This commit is contained in:
@@ -570,7 +570,9 @@ struct tnl_clipspace_fastpath {
|
||||
|
||||
struct {
|
||||
GLuint format;
|
||||
GLuint size;
|
||||
GLuint stride;
|
||||
GLuint offset;
|
||||
} *attr;
|
||||
|
||||
tnl_emit_func func;
|
||||
|
@@ -46,7 +46,9 @@ static GLboolean match_fastpath( struct tnl_clipspace *vtx,
|
||||
return GL_FALSE;
|
||||
|
||||
for (j = 0; j < vtx->attr_count; j++)
|
||||
if (vtx->attr[j].format != fp->attr[j].format)
|
||||
if (vtx->attr[j].format != fp->attr[j].format ||
|
||||
vtx->attr[j].inputsize != fp->attr[j].size ||
|
||||
vtx->attr[j].vertoffset != fp->attr[j].offset)
|
||||
return GL_FALSE;
|
||||
|
||||
if (fp->match_strides) {
|
||||
@@ -90,6 +92,8 @@ void _tnl_register_fastpath( struct tnl_clipspace *vtx,
|
||||
for (i = 0; i < vtx->attr_count; i++) {
|
||||
fastpath->attr[i].format = vtx->attr[i].format;
|
||||
fastpath->attr[i].stride = vtx->attr[i].inputstride;
|
||||
fastpath->attr[i].size = vtx->attr[i].inputsize;
|
||||
fastpath->attr[i].offset = vtx->attr[i].vertoffset;
|
||||
}
|
||||
|
||||
fastpath->next = vtx->fastpath;
|
||||
@@ -470,8 +474,8 @@ void _tnl_init_vertices( GLcontext *ctx,
|
||||
|
||||
vtx->codegen_emit = NULL;
|
||||
|
||||
#ifdef __i386__
|
||||
if (getenv("MESA_EXPERIMENTAL"))
|
||||
#ifdef USE_SSE_ASM
|
||||
if (!_mesa_getenv("MESA_NO_CODEGEN"))
|
||||
vtx->codegen_emit = _tnl_generate_sse_emit;
|
||||
#endif
|
||||
}
|
||||
|
@@ -33,12 +33,14 @@
|
||||
#include "simple_list.h"
|
||||
#include "enums.h"
|
||||
|
||||
#if defined(USE_X86_ASM)
|
||||
|
||||
#define X 0
|
||||
#define Y 1
|
||||
#define Z 2
|
||||
#define W 3
|
||||
|
||||
#define DISASSEM 1
|
||||
#define DISASSEM 0
|
||||
|
||||
struct x86_reg {
|
||||
GLuint file:3;
|
||||
@@ -1208,18 +1210,26 @@ static GLboolean build_vertex_emit( struct x86_program *p )
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
#include "x86/common_x86_asm.h"
|
||||
|
||||
|
||||
void _tnl_generate_sse_emit( GLcontext *ctx )
|
||||
{
|
||||
struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
|
||||
struct x86_program p;
|
||||
|
||||
if (!cpu_has_xmm) {
|
||||
vtx->codegen_emit = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&p, 0, sizeof(p));
|
||||
p.ctx = ctx;
|
||||
p.store = MALLOC(1024);
|
||||
|
||||
p.inputs_safe = 1; /* for now */
|
||||
p.inputs_safe = 0; /* for now */
|
||||
p.outputs_safe = 1; /* for now */
|
||||
p.have_sse2 = 1; /* testing */
|
||||
p.have_sse2 = cpu_has_xmm2;
|
||||
p.identity = make_reg(file_XMM, 6);
|
||||
p.chan0 = make_reg(file_XMM, 7);
|
||||
|
||||
@@ -1246,3 +1256,12 @@ void _tnl_generate_sse_emit( GLcontext *ctx )
|
||||
(void)sse2_packsswb;
|
||||
(void)sse2_pshufd;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void _tnl_generate_sse_emit( GLcontext *ctx )
|
||||
{
|
||||
/* Dummy version for when USE_SSE_ASM not defined */
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user