Revert "Redoing the way we handle vertex shaders for the draw module."

This reverts commit 6dcfddb8e2.
This commit is contained in:
Zack Rusin
2007-09-28 12:28:16 -04:00
parent f2a33a63f1
commit 901577e07f
17 changed files with 141 additions and 217 deletions

View File

@@ -155,6 +155,14 @@ void draw_set_viewport_state( struct draw_context *draw,
} }
void
draw_set_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
{
draw_flush( draw );
draw->vertex_shader = *shader;
}
void void
draw_set_vertex_buffer(struct draw_context *draw, draw_set_vertex_buffer(struct draw_context *draw,

View File

@@ -92,12 +92,9 @@ void draw_set_rasterize_stage( struct draw_context *draw,
struct draw_stage *stage ); struct draw_stage *stage );
void * draw_create_vertex_shader(struct draw_context *draw, void
const struct pipe_shader_state *shader); draw_set_vertex_shader(struct draw_context *draw,
void draw_bind_vertex_shader(struct draw_context *draw, const struct pipe_shader_state *shader);
void *vcso);
void draw_delete_vertex_shader(struct draw_context *draw,
void *vcso);
void void

View File

@@ -46,7 +46,6 @@
#include "draw_vertex.h" #include "draw_vertex.h"
#include "x86/rtasm/x86sse.h"
/** /**
* Basic vertex info. * Basic vertex info.
@@ -117,15 +116,6 @@ struct draw_stage
#define VCACHE_OVERFLOW 4 #define VCACHE_OVERFLOW 4
#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ #define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */
/**
* Private version of the compiled vertex_shader
*/
struct draw_vertex_shader {
const struct pipe_shader_state *state;
#if defined(__i386__) || defined(__386__)
struct x86_function sse2_program;
#endif
};
/** /**
* Private context for the drawing module. * Private context for the drawing module.
@@ -155,7 +145,7 @@ struct draw_context
struct pipe_viewport_state viewport; struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
const struct draw_vertex_shader *vertex_shader; struct pipe_shader_state vertex_shader;
struct pipe_vertex_buffer feedback_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_buffer feedback_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element feedback_element[PIPE_ATTRIB_MAX]; struct pipe_vertex_element feedback_element[PIPE_ATTRIB_MAX];

View File

@@ -85,7 +85,7 @@ void draw_vertex_fetch( struct draw_context *draw,
/*printf("fetch vertex %u: \n", j);*/ /*printf("fetch vertex %u: \n", j);*/
/* loop over vertex attributes (vertex shader inputs) */ /* loop over vertex attributes (vertex shader inputs) */
for (attr = 0; attr < draw->vertex_shader->state->num_inputs; attr++) { for (attr = 0; attr < draw->vertex_shader.num_inputs; attr++) {
unsigned buf = draw->vertex_element[attr].vertex_buffer_index; unsigned buf = draw->vertex_element[attr].vertex_buffer_index;
const void *src const void *src

View File

@@ -36,8 +36,6 @@
#include "draw_context.h" #include "draw_context.h"
#include "draw_vertex.h" #include "draw_vertex.h"
#include "x86/rtasm/x86sse.h"
#include "pipe/tgsi/exec/tgsi_core.h" #include "pipe/tgsi/exec/tgsi_core.h"
static INLINE unsigned static INLINE unsigned
@@ -72,7 +70,6 @@ typedef void (XSTDCALL *codegen_function) (
float (*constant)[4], float (*constant)[4],
struct tgsi_exec_vector *temporary ); struct tgsi_exec_vector *temporary );
/** /**
* Transform vertices with the current vertex program/shader * Transform vertices with the current vertex program/shader
* Up to four vertices can be shaded at a time. * Up to four vertices can be shaded at a time.
@@ -95,7 +92,7 @@ run_vertex_program(struct draw_context *draw,
const float *trans = draw->viewport.translate; const float *trans = draw->viewport.translate;
assert(count <= 4); assert(count <= 4);
assert(draw->vertex_shader->state->output_semantic_name[0] assert(draw->vertex_shader.output_semantic_name[0]
== TGSI_SEMANTIC_POSITION); == TGSI_SEMANTIC_POSITION);
#ifdef DEBUG #ifdef DEBUG
@@ -104,7 +101,7 @@ run_vertex_program(struct draw_context *draw,
/* init machine state */ /* init machine state */
tgsi_exec_machine_init(&machine, tgsi_exec_machine_init(&machine,
draw->vertex_shader->state->tokens, draw->vertex_shader.tokens,
PIPE_MAX_SAMPLERS, PIPE_MAX_SAMPLERS,
NULL /*samplers*/ ); NULL /*samplers*/ );
@@ -117,8 +114,8 @@ run_vertex_program(struct draw_context *draw,
draw_vertex_fetch( draw, &machine, elts, count ); draw_vertex_fetch( draw, &machine, elts, count );
/* run shader */ /* run shader */
if( draw->vertex_shader->state->executable != NULL ) { if( draw->vertex_shader.executable != NULL ) {
codegen_function func = (codegen_function) draw->vertex_shader->state->executable; codegen_function func = (codegen_function) draw->vertex_shader.executable;
func( func(
machine.Inputs, machine.Inputs,
machine.Outputs, machine.Outputs,
@@ -209,42 +206,3 @@ void draw_vertex_shader_queue_flush( struct draw_context *draw )
draw->vs.queue_nr = 0; draw->vs.queue_nr = 0;
} }
void *
draw_create_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
{
struct draw_vertex_shader *vs = calloc(1, sizeof(struct draw_vertex_shader));
vs->state = shader;
#if defined(__i386__) || defined(__386__)
x86_init_func(&vs->sse2_program);
tgsi_emit_sse2(shader->tokens, &vs->sse2_program);
((struct pipe_shader_state*)(vs->state))->executable =
x86_get_func(&vs->sse2_program);
#endif
return vs;
}
void draw_bind_vertex_shader(struct draw_context *draw,
void *vcso)
{
draw_flush(draw);
draw->vertex_shader = (struct draw_vertex_shader*)(vcso);
}
void draw_delete_vertex_shader(struct draw_context *draw,
void *vcso)
{
struct draw_vertex_shader *vs = (struct draw_vertex_shader*)(vcso);
#if defined(__i386__) || defined(__386__)
x86_release_func(&vs->sse2_program);
#endif
free(vcso);
}

View File

@@ -443,13 +443,16 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe,
{ {
} }
static void * i915_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ) static void *
i915_create_shader_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{ {
return 0; return 0;
} }
static void i915_bind_fs_state(struct pipe_context *pipe, void *fs) static void i915_bind_fs_state( struct pipe_context *pipe,
void *fs )
{ {
struct i915_context *i915 = i915_context(pipe); struct i915_context *i915 = i915_context(pipe);
@@ -458,37 +461,22 @@ static void i915_bind_fs_state(struct pipe_context *pipe, void *fs)
i915->dirty |= I915_NEW_FS; i915->dirty |= I915_NEW_FS;
} }
static void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
static void i915_bind_vs_state(struct pipe_context *pipe,
void *vs)
{
struct i915_context *i915 = i915_context(pipe);
/* just pass-through to draw module */
draw_set_vertex_shader(i915->draw, (const struct pipe_shader_state *)vs);
}
static void i915_delete_shader_state(struct pipe_context *pipe,
void *shader)
{ {
/*do nothing*/ /*do nothing*/
} }
static void *
i915_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
struct i915_context *i915 = i915_context(pipe);
/* just pass-through to draw module */
return draw_create_vertex_shader(i915->draw, templ);
}
static void i915_bind_vs_state(struct pipe_context *pipe, void *vs)
{
struct i915_context *i915 = i915_context(pipe);
/* just pass-through to draw module */
draw_bind_vertex_shader(i915->draw, vs);
}
static void i915_delete_vs_state(struct pipe_context *pipe, void *shader)
{
struct i915_context *i915 = i915_context(pipe);
/* just pass-through to draw module */
draw_delete_vertex_shader(i915->draw, shader);
}
static void i915_set_constant_buffer(struct pipe_context *pipe, static void i915_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index, uint shader, uint index,
const struct pipe_constant_buffer *buf) const struct pipe_constant_buffer *buf)
@@ -719,12 +707,12 @@ i915_init_state_functions( struct i915_context *i915 )
i915->pipe.create_rasterizer_state = i915_create_rasterizer_state; i915->pipe.create_rasterizer_state = i915_create_rasterizer_state;
i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state; i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state;
i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state; i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state;
i915->pipe.create_fs_state = i915_create_fs_state; i915->pipe.create_fs_state = i915_create_shader_state;
i915->pipe.bind_fs_state = i915_bind_fs_state; i915->pipe.bind_fs_state = i915_bind_fs_state;
i915->pipe.delete_fs_state = i915_delete_fs_state; i915->pipe.delete_fs_state = i915_delete_shader_state;
i915->pipe.create_vs_state = i915_create_vs_state; i915->pipe.create_vs_state = i915_create_shader_state;
i915->pipe.bind_vs_state = i915_bind_vs_state; i915->pipe.bind_vs_state = i915_bind_vs_state;
i915->pipe.delete_vs_state = i915_delete_vs_state; i915->pipe.delete_vs_state = i915_delete_shader_state;
i915->pipe.set_blend_color = i915_set_blend_color; i915->pipe.set_blend_color = i915_set_blend_color;
i915->pipe.set_clip_state = i915_set_clip_state; i915->pipe.set_clip_state = i915_set_clip_state;

View File

@@ -275,12 +275,12 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys,
softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state;
softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state;
softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state;
softpipe->pipe.create_fs_state = softpipe_create_fs_state; softpipe->pipe.create_fs_state = softpipe_create_shader_state;
softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; softpipe->pipe.bind_fs_state = softpipe_bind_fs_state;
softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; softpipe->pipe.delete_fs_state = softpipe_delete_shader_state;
softpipe->pipe.create_vs_state = softpipe_create_vs_state; softpipe->pipe.create_vs_state = softpipe_create_shader_state;
softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; softpipe->pipe.bind_vs_state = softpipe_bind_vs_state;
softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; softpipe->pipe.delete_vs_state = softpipe_delete_shader_state;
softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_blend_color = softpipe_set_blend_color;
softpipe->pipe.set_clip_state = softpipe_set_clip_state; softpipe->pipe.set_clip_state = softpipe_set_clip_state;

View File

@@ -62,10 +62,6 @@ struct draw_stage;
#define SP_NEW_VS 0x2000 #define SP_NEW_VS 0x2000
#define SP_NEW_CONSTANTS 0x4000 #define SP_NEW_CONSTANTS 0x4000
struct sp_vertex_shader_state {
const struct pipe_shader_state *state;
void *draw_data;
};
struct softpipe_context { struct softpipe_context {
struct pipe_context pipe; /**< base class */ struct pipe_context pipe; /**< base class */
@@ -80,7 +76,7 @@ struct softpipe_context {
const struct pipe_depth_stencil_state *depth_stencil; const struct pipe_depth_stencil_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer; const struct pipe_rasterizer_state *rasterizer;
const struct pipe_shader_state *fs; const struct pipe_shader_state *fs;
const struct sp_vertex_shader_state *vs; const struct pipe_shader_state *vs;
struct pipe_blend_color blend_color; struct pipe_blend_color blend_color;
struct pipe_clear_color_state clear_color; struct pipe_clear_color_state clear_color;

View File

@@ -87,14 +87,12 @@ void softpipe_set_constant_buffer(struct pipe_context *,
void softpipe_set_feedback_state( struct pipe_context *, void softpipe_set_feedback_state( struct pipe_context *,
const struct pipe_feedback_state * ); const struct pipe_feedback_state * );
void *softpipe_create_fs_state(struct pipe_context *, void *
const struct pipe_shader_state *); softpipe_create_shader_state( struct pipe_context *,
void softpipe_bind_fs_state(struct pipe_context *, void *); const struct pipe_shader_state * );
void softpipe_delete_fs_state(struct pipe_context *, void *); void softpipe_bind_fs_state( struct pipe_context *, void * );
void *softpipe_create_vs_state(struct pipe_context *, void softpipe_bind_vs_state( struct pipe_context *, void * );
const struct pipe_shader_state *); void softpipe_delete_shader_state( struct pipe_context *, void * );
void softpipe_bind_vs_state(struct pipe_context *, void *);
void softpipe_delete_vs_state(struct pipe_context *, void *);
void softpipe_set_polygon_stipple( struct pipe_context *, void softpipe_set_polygon_stipple( struct pipe_context *,
const struct pipe_poly_stipple * ); const struct pipe_poly_stipple * );

View File

@@ -43,7 +43,7 @@
*/ */
static void calculate_vertex_layout( struct softpipe_context *softpipe ) static void calculate_vertex_layout( struct softpipe_context *softpipe )
{ {
const struct pipe_shader_state *vs = softpipe->vs->state; const struct pipe_shader_state *vs = softpipe->vs;
const struct pipe_shader_state *fs = softpipe->fs; const struct pipe_shader_state *fs = softpipe->fs;
const interp_mode colorInterp const interp_mode colorInterp
= softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;

View File

@@ -33,13 +33,10 @@
#include "pipe/draw/draw_context.h" #include "pipe/draw/draw_context.h"
void * softpipe_create_fs_state(struct pipe_context *pipe, void * softpipe_create_shader_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ) const struct pipe_shader_state *templ)
{ {
/* Decide whether we'll be codegenerating this shader and if so do /* we just want the pipe_shader_state template in the bind calls */
* that now.
*/
return 0; return 0;
} }
@@ -52,51 +49,25 @@ void softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
softpipe->dirty |= SP_NEW_FS; softpipe->dirty |= SP_NEW_FS;
} }
void softpipe_delete_fs_state(struct pipe_context *pipe,
void *shader)
{
}
void * softpipe_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
struct sp_vertex_shader_state *state =
malloc(sizeof(struct sp_vertex_shader_state));
state->state = templ;
state->draw_data = draw_create_vertex_shader(softpipe->draw,
state->state);
return state;
}
void softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) void softpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
{ {
struct softpipe_context *softpipe = softpipe_context(pipe); struct softpipe_context *softpipe = softpipe_context(pipe);
softpipe->vs = (const struct sp_vertex_shader_state *)vs; softpipe->vs = (struct pipe_shader_state *)vs;
draw_bind_vertex_shader(softpipe->draw, softpipe->vs->draw_data);
softpipe->dirty |= SP_NEW_VS; softpipe->dirty |= SP_NEW_VS;
draw_set_vertex_shader(softpipe->draw, (struct pipe_shader_state *)vs);
} }
void softpipe_delete_vs_state(struct pipe_context *pipe,
void *vs) void softpipe_delete_shader_state( struct pipe_context *pipe,
void *shader )
{ {
struct softpipe_context *softpipe = softpipe_context(pipe); /* do nothing */
struct sp_vertex_shader_state *state =
(struct sp_vertex_shader_state *)vs;
draw_delete_vertex_shader(softpipe->draw, state->draw_data);
free(state);
} }
void softpipe_set_constant_buffer(struct pipe_context *pipe, void softpipe_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index, uint shader, uint index,
const struct pipe_constant_buffer *buf) const struct pipe_constant_buffer *buf)

View File

@@ -86,6 +86,10 @@ static struct gl_program *st_new_program( GLcontext *ctx,
prog->serialNo = 1; prog->serialNo = 1;
#if defined(__i386__) || defined(__386__)
x86_init_func( &prog->sse2_program );
#endif
return _mesa_init_vertex_program( ctx, return _mesa_init_vertex_program( ctx,
&prog->Base, &prog->Base,
target, target,
@@ -125,6 +129,9 @@ static void st_delete_program( GLcontext *ctx,
case GL_VERTEX_PROGRAM_ARB: case GL_VERTEX_PROGRAM_ARB:
{ {
struct st_vertex_program *stvp = (struct st_vertex_program *) prog; struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
#if defined(__i386__) || defined(__386__)
x86_release_func( &stvp->sse2_program );
#endif
st_remove_vertex_program(st, stvp); st_remove_vertex_program(st, stvp);
} }
break; break;

View File

@@ -405,7 +405,7 @@ st_feedback_draw_vbo(GLcontext *ctx,
draw_set_viewport_state(draw, &st->state.viewport); draw_set_viewport_state(draw, &st->state.viewport);
draw_set_clip_state(draw, &st->state.clip); draw_set_clip_state(draw, &st->state.clip);
draw_set_rasterizer_state(draw, &st->state.rasterizer->state); draw_set_rasterizer_state(draw, &st->state.rasterizer->state);
draw_bind_vertex_shader(draw, st->state.vs->data); draw_set_vertex_shader(draw, &st->state.vs->state);
/* XXX need to set vertex info too */ /* XXX need to set vertex info too */

View File

@@ -253,6 +253,14 @@ st_translate_vertex_program(struct st_context *st,
if (TGSI_DEBUG) if (TGSI_DEBUG)
tgsi_dump( tokensOut, 0 ); tgsi_dump( tokensOut, 0 );
#if defined(__i386__) || defined(__386__)
if (stvp->sse2_program.csr == stvp->sse2_program.store)
tgsi_emit_sse2( tokensOut, &stvp->sse2_program );
if (!cso->state.executable)
((struct cso_vertex_shader*)cso)->state.executable = (void *) x86_get_func( &stvp->sse2_program );
#endif
return cso; return cso;
} }

View File

@@ -79,6 +79,10 @@ struct st_vertex_program
/** The program in TGSI format */ /** The program in TGSI format */
struct tgsi_token tokens[ST_FP_MAX_TOKENS]; struct tgsi_token tokens[ST_FP_MAX_TOKENS];
#if defined(__i386__) || defined(__386__)
struct x86_function sse2_program;
#endif
/** Pointer to the corresponding cached shader */ /** Pointer to the corresponding cached shader */
const struct cso_vertex_shader *vs; const struct cso_vertex_shader *vs;

View File

@@ -6,22 +6,22 @@
#define DISASSEM 0 #define DISASSEM 0
#define X86_TWOB 0x0f #define X86_TWOB 0x0f
static unsigned char *cptr( void (*label)() ) static GLubyte *cptr( void (*label)() )
{ {
return (unsigned char *)(unsigned long)label; return (char *)(unsigned long)label;
} }
/* Emit bytes to the instruction stream: /* Emit bytes to the instruction stream:
*/ */
static void emit_1b( struct x86_function *p, char b0 ) static void emit_1b( struct x86_function *p, GLbyte b0 )
{ {
*(char *)(p->csr++) = b0; *(GLbyte *)(p->csr++) = b0;
} }
static void emit_1i( struct x86_function *p, int i0 ) static void emit_1i( struct x86_function *p, GLint i0 )
{ {
*(int *)(p->csr) = i0; *(GLint *)(p->csr) = i0;
p->csr += 4; p->csr += 4;
} }
@@ -35,20 +35,20 @@ static void disassem( struct x86_function *p, const char *fn )
#endif #endif
} }
static void emit_1ub_fn( struct x86_function *p, unsigned char b0, const char *fn ) static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn )
{ {
disassem(p, fn); disassem(p, fn);
*(p->csr++) = b0; *(p->csr++) = b0;
} }
static void emit_2ub_fn( struct x86_function *p, unsigned char b0, unsigned char b1, const char *fn ) static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn )
{ {
disassem(p, fn); disassem(p, fn);
*(p->csr++) = b0; *(p->csr++) = b0;
*(p->csr++) = b1; *(p->csr++) = b1;
} }
static void emit_3ub_fn( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2, const char *fn ) static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
{ {
disassem(p, fn); disassem(p, fn);
*(p->csr++) = b0; *(p->csr++) = b0;
@@ -69,7 +69,7 @@ static void emit_modrm( struct x86_function *p,
struct x86_reg reg, struct x86_reg reg,
struct x86_reg regmem ) struct x86_reg regmem )
{ {
unsigned char val = 0; GLubyte val = 0;
assert(reg.mod == mod_REG); assert(reg.mod == mod_REG);
@@ -104,7 +104,7 @@ static void emit_modrm( struct x86_function *p,
static void emit_modrm_noreg( struct x86_function *p, static void emit_modrm_noreg( struct x86_function *p,
unsigned op, GLuint op,
struct x86_reg regmem ) struct x86_reg regmem )
{ {
struct x86_reg dummy = x86_make_reg(file_REG32, op); struct x86_reg dummy = x86_make_reg(file_REG32, op);
@@ -117,8 +117,8 @@ static void emit_modrm_noreg( struct x86_function *p,
* the arguments presented. * the arguments presented.
*/ */
static void emit_op_modrm( struct x86_function *p, static void emit_op_modrm( struct x86_function *p,
unsigned char op_dst_is_reg, GLubyte op_dst_is_reg,
unsigned char op_dst_is_mem, GLubyte op_dst_is_mem,
struct x86_reg dst, struct x86_reg dst,
struct x86_reg src ) struct x86_reg src )
{ {
@@ -162,7 +162,7 @@ struct x86_reg x86_make_reg( enum x86_reg_file file,
} }
struct x86_reg x86_make_disp( struct x86_reg reg, struct x86_reg x86_make_disp( struct x86_reg reg,
int disp ) GLint disp )
{ {
assert(reg.file == file_REG32); assert(reg.file == file_REG32);
@@ -191,7 +191,7 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg )
return x86_make_reg( reg.file, reg.idx ); return x86_make_reg( reg.file, reg.idx );
} }
unsigned char *x86_get_label( struct x86_function *p ) GLubyte *x86_get_label( struct x86_function *p )
{ {
return p->csr; return p->csr;
} }
@@ -205,13 +205,13 @@ unsigned char *x86_get_label( struct x86_function *p )
void x86_jcc( struct x86_function *p, void x86_jcc( struct x86_function *p,
enum x86_cc cc, enum x86_cc cc,
unsigned char *label ) GLubyte *label )
{ {
int offset = label - (x86_get_label(p) + 2); GLint offset = label - (x86_get_label(p) + 2);
if (offset <= 127 && offset >= -128) { if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc); emit_1ub(p, 0x70 + cc);
emit_1b(p, (char) offset); emit_1b(p, (GLbyte) offset);
} }
else { else {
offset = label - (x86_get_label(p) + 6); offset = label - (x86_get_label(p) + 6);
@@ -222,7 +222,7 @@ void x86_jcc( struct x86_function *p,
/* Always use a 32bit offset for forward jumps: /* Always use a 32bit offset for forward jumps:
*/ */
unsigned char *x86_jcc_forward( struct x86_function *p, GLubyte *x86_jcc_forward( struct x86_function *p,
enum x86_cc cc ) enum x86_cc cc )
{ {
emit_2ub(p, 0x0f, 0x80 + cc); emit_2ub(p, 0x0f, 0x80 + cc);
@@ -230,14 +230,14 @@ unsigned char *x86_jcc_forward( struct x86_function *p,
return x86_get_label(p); return x86_get_label(p);
} }
unsigned char *x86_jmp_forward( struct x86_function *p) GLubyte *x86_jmp_forward( struct x86_function *p)
{ {
emit_1ub(p, 0xe9); emit_1ub(p, 0xe9);
emit_1i(p, 0); emit_1i(p, 0);
return x86_get_label(p); return x86_get_label(p);
} }
unsigned char *x86_call_forward( struct x86_function *p) GLubyte *x86_call_forward( struct x86_function *p)
{ {
emit_1ub(p, 0xe8); emit_1ub(p, 0xe8);
emit_1i(p, 0); emit_1i(p, 0);
@@ -247,12 +247,12 @@ unsigned char *x86_call_forward( struct x86_function *p)
/* Fixup offset from forward jump: /* Fixup offset from forward jump:
*/ */
void x86_fixup_fwd_jump( struct x86_function *p, void x86_fixup_fwd_jump( struct x86_function *p,
unsigned char *fixup ) GLubyte *fixup )
{ {
*(int *)(fixup - 4) = x86_get_label(p) - fixup; *(int *)(fixup - 4) = x86_get_label(p) - fixup;
} }
void x86_jmp( struct x86_function *p, unsigned char *label) void x86_jmp( struct x86_function *p, GLubyte *label)
{ {
emit_1ub(p, 0xe9); emit_1ub(p, 0xe9);
emit_1i(p, label - x86_get_label(p) - 4); emit_1i(p, label - x86_get_label(p) - 4);
@@ -268,7 +268,7 @@ void x86_call( struct x86_function *p, void (*label)())
* Temporary. As I need immediate operands, and dont want to mess with the codegen, * Temporary. As I need immediate operands, and dont want to mess with the codegen,
* I load the immediate into general purpose register and use it. * I load the immediate into general purpose register and use it.
*/ */
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm )
{ {
assert(dst.mod == mod_REG); assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx); emit_1ub(p, 0xb8 + dst.idx);
@@ -595,7 +595,7 @@ void sse_cvtps2pi( struct x86_function *p,
void sse_shufps( struct x86_function *p, void sse_shufps( struct x86_function *p,
struct x86_reg dest, struct x86_reg dest,
struct x86_reg arg0, struct x86_reg arg0,
unsigned char shuf) GLubyte shuf)
{ {
emit_2ub(p, X86_TWOB, 0xC6); emit_2ub(p, X86_TWOB, 0xC6);
emit_modrm(p, dest, arg0); emit_modrm(p, dest, arg0);
@@ -605,7 +605,7 @@ void sse_shufps( struct x86_function *p,
void sse_cmpps( struct x86_function *p, void sse_cmpps( struct x86_function *p,
struct x86_reg dest, struct x86_reg dest,
struct x86_reg arg0, struct x86_reg arg0,
unsigned char cc) GLubyte cc)
{ {
emit_2ub(p, X86_TWOB, 0xC2); emit_2ub(p, X86_TWOB, 0xC2);
emit_modrm(p, dest, arg0); emit_modrm(p, dest, arg0);
@@ -630,7 +630,7 @@ void sse_pmovmskb( struct x86_function *p,
void sse2_pshufd( struct x86_function *p, void sse2_pshufd( struct x86_function *p,
struct x86_reg dest, struct x86_reg dest,
struct x86_reg arg0, struct x86_reg arg0,
unsigned char shuf) GLubyte shuf)
{ {
emit_3ub(p, 0x66, X86_TWOB, 0x70); emit_3ub(p, 0x66, X86_TWOB, 0x70);
emit_modrm(p, dest, arg0); emit_modrm(p, dest, arg0);
@@ -772,11 +772,11 @@ void x87_fclex( struct x86_function *p )
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
unsigned char dst0ub0, GLubyte dst0ub0,
unsigned char dst0ub1, GLubyte dst0ub1,
unsigned char arg0ub0, GLubyte arg0ub0,
unsigned char arg0ub1, GLubyte arg0ub1,
unsigned char argmem_noreg) GLubyte argmem_noreg)
{ {
assert(dst.file == file_x87); assert(dst.file == file_x87);
@@ -1116,7 +1116,7 @@ void mmx_movq( struct x86_function *p,
* account any push/pop activity: * account any push/pop activity:
*/ */
struct x86_reg x86_fn_arg( struct x86_function *p, struct x86_reg x86_fn_arg( struct x86_function *p,
unsigned arg ) GLuint arg )
{ {
return x86_make_disp(x86_make_reg(file_REG32, reg_SP), return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */ p->stack_offset + arg * 4); /* ??? */
@@ -1128,7 +1128,7 @@ void x86_init_func( struct x86_function *p )
x86_init_func_size(p, 1024); x86_init_func_size(p, 1024);
} }
void x86_init_func_size( struct x86_function *p, unsigned code_size ) void x86_init_func_size( struct x86_function *p, GLuint code_size )
{ {
p->store = _mesa_exec_malloc(code_size); p->store = _mesa_exec_malloc(code_size);
p->csr = p->store; p->csr = p->store;

View File

@@ -4,22 +4,24 @@
#if defined(__i386__) || defined(__386__) #if defined(__i386__) || defined(__386__)
#include "glheader.h"
/* It is up to the caller to ensure that instructions issued are /* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module * suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu. * for mmx/sse/sse2 support on the cpu.
*/ */
struct x86_reg { struct x86_reg {
unsigned file:3; GLuint file:3;
unsigned idx:3; GLuint idx:3;
unsigned mod:2; /* mod_REG if this is just a register */ GLuint mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */ GLint disp:24; /* only +/- 23bits of offset - should be enough... */
}; };
struct x86_function { struct x86_function {
unsigned char *store; GLubyte *store;
unsigned char *csr; GLubyte *csr;
unsigned stack_offset; GLuint stack_offset;
int need_emms; GLint need_emms;
const char *fn; const char *fn;
}; };
@@ -79,7 +81,7 @@ enum sse_cc {
void x86_init_func( struct x86_function *p ); void x86_init_func( struct x86_function *p );
void x86_init_func_size( struct x86_function *p, unsigned code_size ); void x86_init_func_size( struct x86_function *p, GLuint code_size );
void x86_release_func( struct x86_function *p ); void x86_release_func( struct x86_function *p );
void (*x86_get_func( struct x86_function *p ))( void ); void (*x86_get_func( struct x86_function *p ))( void );
@@ -91,7 +93,7 @@ struct x86_reg x86_make_reg( enum x86_reg_file file,
enum x86_reg_name idx ); enum x86_reg_name idx );
struct x86_reg x86_make_disp( struct x86_reg reg, struct x86_reg x86_make_disp( struct x86_reg reg,
int disp ); GLint disp );
struct x86_reg x86_deref( struct x86_reg reg ); struct x86_reg x86_deref( struct x86_reg reg );
@@ -100,23 +102,23 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
/* Labels, jumps and fixup: /* Labels, jumps and fixup:
*/ */
unsigned char *x86_get_label( struct x86_function *p ); GLubyte *x86_get_label( struct x86_function *p );
void x86_jcc( struct x86_function *p, void x86_jcc( struct x86_function *p,
enum x86_cc cc, enum x86_cc cc,
unsigned char *label ); GLubyte *label );
unsigned char *x86_jcc_forward( struct x86_function *p, GLubyte *x86_jcc_forward( struct x86_function *p,
enum x86_cc cc ); enum x86_cc cc );
unsigned char *x86_jmp_forward( struct x86_function *p); GLubyte *x86_jmp_forward( struct x86_function *p);
unsigned char *x86_call_forward( struct x86_function *p); GLubyte *x86_call_forward( struct x86_function *p);
void x86_fixup_fwd_jump( struct x86_function *p, void x86_fixup_fwd_jump( struct x86_function *p,
unsigned char *fixup ); GLubyte *fixup );
void x86_jmp( struct x86_function *p, unsigned char *label ); void x86_jmp( struct x86_function *p, GLubyte *label );
void x86_call( struct x86_function *p, void (*label)() ); void x86_call( struct x86_function *p, void (*label)() );
@@ -124,7 +126,7 @@ void x86_call( struct x86_function *p, void (*label)() );
* Temporary. As I need immediate operands, and dont want to mess with the codegen, * Temporary. As I need immediate operands, and dont want to mess with the codegen,
* I load the immediate into general purpose register and use it. * I load the immediate into general purpose register and use it.
*/ */
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm );
/* Macro for sse_shufps() and sse2_pshufd(): /* Macro for sse_shufps() and sse2_pshufd():
@@ -145,8 +147,7 @@ void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -156,8 +157,7 @@ void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc );
unsigned char cc );
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -175,8 +175,7 @@ void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
unsigned char shuf );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -248,7 +247,7 @@ void x87_fucom( struct x86_function *p, struct x86_reg arg );
* account any push/pop activity. Note - doesn't track explict * account any push/pop activity. Note - doesn't track explict
* manipulation of ESP by other instructions. * manipulation of ESP by other instructions.
*/ */
struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); struct x86_reg x86_fn_arg( struct x86_function *p, GLuint arg );
#endif #endif
#endif #endif