codegen'ed versions of the 2nd level dispatch

This commit is contained in:
Daniel Borca
2004-04-01 06:53:22 +00:00
parent 13822537a7
commit e5c7f44009
5 changed files with 101 additions and 12 deletions

View File

@@ -901,6 +901,7 @@ void _tnl_vtx_init( GLcontext *ctx )
_tnl_current_init( ctx ); _tnl_current_init( ctx );
_tnl_exec_vtxfmt_init( ctx ); _tnl_exec_vtxfmt_init( ctx );
_tnl_generic_exec_vtxfmt_init( ctx ); _tnl_generic_exec_vtxfmt_init( ctx );
_tnl_x86_exec_vtxfmt_init( ctx ); /* [dBorca] x86 DISPATCH_ATTRFV */
_mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt ); _mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt );

View File

@@ -78,6 +78,8 @@ extern void _tnl_generic_attr_table_init( attrfv_func (*tab)[4] );
*/ */
extern void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen ); extern void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen );
extern void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx );

View File

@@ -409,8 +409,7 @@ static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index,
/* Install the generic versions of the 2nd level dispatch functions. /* Install the generic versions of the 2nd level dispatch functions.
* There's currently no codegen alternative to these, though one is in * [dBorca] Some of these have a codegen alternative.
* the works.
*/ */
void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx ) void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx )
{ {

View File

@@ -59,8 +59,6 @@ EXTERN( _x86_Vertex2fv );
EXTERN( _x86_Vertex3fv ); EXTERN( _x86_Vertex3fv );
EXTERN( _x86_Vertex4fv ); EXTERN( _x86_Vertex4fv );
/* None of these used yet:
*/
EXTERN( _x86_dispatch_attrf ); EXTERN( _x86_dispatch_attrf );
EXTERN( _x86_dispatch_attrfv ); EXTERN( _x86_dispatch_attrfv );
EXTERN( _x86_dispatch_multitexcoordf ); EXTERN( _x86_dispatch_multitexcoordf );
@@ -260,6 +258,77 @@ void _do_choose( void )
{ {
} }
/* [dBorca] I purposely avoided one single macro, since they might need to
* be handled in different ways. Ohwell, once things get much clearer, they
* could collapse...
*/
#define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR) \
do { \
char *code; \
char *start = (char *)&_x86_dispatch_attr##TYPE; \
char *end = (char *)&_x86_dispatch_attr##TYPE##_end; \
int offset = 0; \
code = ALIGN_MALLOC( end - start, 16 ); \
memcpy (code, start, end - start); \
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\
vfmt->FUNC##SIZE##TYPE = code; \
} while (0)
#define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR) \
do { \
char *code; \
char *start = (char *)&_x86_dispatch_multitexcoord##TYPE; \
char *end = (char *)&_x86_dispatch_multitexcoord##TYPE##_end; \
int offset = 0; \
code = ALIGN_MALLOC( end - start, 16 ); \
memcpy (code, start, end - start); \
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[_TNL_ATTRIB_TEX0][SIZE-1]));\
vfmt->FUNC##SIZE##TYPE##ARB = code; \
} while (0)
#define MAKE_DISPATCH_VERTEXATTRIB(FUNC, SIZE, TYPE, ATTR) \
do { \
char *code; \
char *start = (char *)&_x86_dispatch_vertexattrib##TYPE; \
char *end = (char *)&_x86_dispatch_vertexattrib##TYPE##_end; \
int offset = 0; \
code = ALIGN_MALLOC( end - start, 16 ); \
memcpy (code, start, end - start); \
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[0][SIZE-1])); \
vfmt->FUNC##SIZE##TYPE##NV = code; \
} while (0)
/* [dBorca] Install the codegen'ed versions of the 2nd level dispatch
* functions. We should keep a list and free them in the end...
*/
void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )
{
GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
MAKE_DISPATCH_ATTR(Color,3,f, _TNL_ATTRIB_COLOR0);
MAKE_DISPATCH_ATTR(Color,3,fv, _TNL_ATTRIB_COLOR0);
MAKE_DISPATCH_ATTR(Color,4,f, _TNL_ATTRIB_COLOR0);
MAKE_DISPATCH_ATTR(Color,4,fv, _TNL_ATTRIB_COLOR0);
MAKE_DISPATCH_ATTR(Normal,3,f, _TNL_ATTRIB_NORMAL);
MAKE_DISPATCH_ATTR(Normal,3,fv, _TNL_ATTRIB_NORMAL);
MAKE_DISPATCH_ATTR(TexCoord,2,f, _TNL_ATTRIB_TEX0);
MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0);
MAKE_DISPATCH_ATTR(Vertex,3,f, _TNL_ATTRIB_POS);
MAKE_DISPATCH_ATTR(Vertex,3,fv, _TNL_ATTRIB_POS);
/* just add more */
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f, 0);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0);
/* just add more */
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f, 0);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0);
/* just add more */
}
#else #else
void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen ) void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )

View File

@@ -31,11 +31,25 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#if defined(USE_X86_ASM) && !defined(HAVE_NONSTANDARD_GLAPIENTRY) #if defined(USE_X86_ASM) && !defined(HAVE_NONSTANDARD_GLAPIENTRY)
#if !defined (__DJGPP__)
#define GLOBL( x ) \ #define GLOBL( x ) \
.globl x; \ .globl x; \
x: x:
#define EXTRN( x ) x
#else /* defined(__DJGPP__) */
#define GLOBL( x ) \
.globl _##x; \
_##x:
#define EXTRN( x ) _##x
#endif /* defined(__DJGPP__) */
.data .data
.align 4 .align 4
@@ -48,6 +62,10 @@ x:
// [dBorca] TODO
// Unfold functions for each vertex size?
// Build super-specialized MMX/SSE versions?
GLOBL ( _x86_Vertex1fv ) GLOBL ( _x86_Vertex1fv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
push %edi push %edi
@@ -217,9 +235,9 @@ GLOBL( _x86_choose_fv)
subl $12, %esp // gcc does 16 byte alignment of stack frames? subl $12, %esp // gcc does 16 byte alignment of stack frames?
movl $SUBST(0), (%esp) // arg 0 - attrib movl $SUBST(0), (%esp) // arg 0 - attrib
movl $SUBST(1), 4(%esp) // arg 1 - N movl $SUBST(1), 4(%esp) // arg 1 - N
call _do_choose // new function returned in %eax call EXTRN(_do_choose) // new function returned in %eax
add $12, %esp // tear down stack frame add $12, %esp // tear down stack frame
jmp *%eax // jump to new func jmp *%eax // jump to new func
GLOBL ( _x86_choosefv_end ) GLOBL ( _x86_choosefv_end )
@@ -251,7 +269,7 @@ GLOBL( _x86_dispatch_attrf )
subl $12, %esp // gcc does 16 byte alignment of stack frames? subl $12, %esp // gcc does 16 byte alignment of stack frames?
leal 16(%esp), %edx // address of first float on stack leal 16(%esp), %edx // address of first float on stack
movl %edx, (%esp) // save as 'v' movl %edx, (%esp) // save as 'v'
call SUBST(0) // 0x0 --> tabfv[attr][n] call *SUBST(0) // 0x0 --> tabfv[attr][n]
addl $12, %esp // tear down frame addl $12, %esp // tear down frame
ret // return ret // return
GLOBL( _x86_dispatch_attrf_end ) GLOBL( _x86_dispatch_attrf_end )
@@ -259,7 +277,7 @@ GLOBL( _x86_dispatch_attrf_end )
// The fv case is simpler: // The fv case is simpler:
// //
GLOBL( _x86_dispatch_attrfv ) GLOBL( _x86_dispatch_attrfv )
jmp SUBST(0) // 0x0 --> tabfv[attr][n] jmp *SUBST(0) // 0x0 --> tabfv[attr][n]
GLOBL( _x86_dispatch_attrfv_end ) GLOBL( _x86_dispatch_attrfv_end )
@@ -294,7 +312,7 @@ GLOBL( _x86_dispatch_vertexattribf )
movl $16, %ecx movl $16, %ecx
movl 4(%esp), %eax movl 4(%esp), %eax
cmpl $16, %eax cmpl $16, %eax
cmovge %ecx, %eax cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported
leal 8(%esp), %ecx // calculate 'v' leal 8(%esp), %ecx // calculate 'v'
movl %ecx, 4(%esp) // save in 1st arg slot movl %ecx, 4(%esp) // save in 1st arg slot
sall $4, %eax sall $4, %eax
@@ -305,7 +323,7 @@ GLOBL( _x86_dispatch_vertexattribfv )
movl $16, %ecx movl $16, %ecx
movl 4(%esp), %eax movl 4(%esp), %eax
cmpl $16, %eax cmpl $16, %eax
cmovge %ecx, %eax cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported
movl 8(%esp), %ecx // load 'v' movl 8(%esp), %ecx // load 'v'
movl %ecx, 4(%esp) // save in 1st arg slot movl %ecx, 4(%esp) // save in 1st arg slot
sall $4, %eax sall $4, %eax