draw: add aos vertex shader varient

This commit is contained in:
Keith Whitwell
2008-05-21 09:44:16 +01:00
parent ba738a3135
commit 1ba10e5ccf
6 changed files with 2247 additions and 0 deletions

View File

@@ -35,6 +35,8 @@ C_SOURCES = \
draw_vertex.c \
draw_vs.c \
draw_vs_varient.c \
draw_vs_aos.c \
draw_vs_aos_io.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c

View File

@@ -162,6 +162,16 @@ struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vs_varient_key;
struct draw_vertex_shader;
struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key );
/********************************************************************************
* Helpers for vs implementations that don't do their own fetch/emit varients.
* Means these can be shared between shaders.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,181 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef DRAW_VS_AOS_H
#define DRAW_VS_AOS_H
struct tgsi_token;
struct x86_function;
#include "pipe/p_state.h"
#include "rtasm/rtasm_x86sse.h"
#define X 0
#define Y 1
#define Z 2
#define W 3
#define MAX_INPUTS PIPE_MAX_ATTRIBS
#define MAX_OUTPUTS PIPE_MAX_ATTRIBS
#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */
#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */
#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */
#define MAX_INTERNALS 4
#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
*/
struct aos_machine {
float input [MAX_INPUTS ][4];
float output [MAX_OUTPUTS ][4];
float temp [MAX_TEMPS ][4];
float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
float internal [MAX_INTERNALS ][4];
unsigned fpu_round_nearest;
unsigned fpu_round_neg_inf;
struct {
const void *input_ptr;
unsigned input_stride;
unsigned output_offset;
} attrib[PIPE_MAX_ATTRIBS];
};
struct aos_compilation {
struct x86_function *func;
struct draw_vs_varient_aos_sse *vaos;
unsigned insn_counter;
unsigned num_immediates;
struct {
unsigned idx:16;
unsigned file:8;
unsigned dirty:8;
unsigned last_used;
} xmm[8];
boolean input_fetched[PIPE_MAX_ATTRIBS];
unsigned output_last_write[PIPE_MAX_ATTRIBS];
boolean have_sse2;
boolean error;
short fpucntl;
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
struct x86_reg idx_EBX; /* either start+i or &elt[i] */
struct x86_reg outbuf_ECX;
struct x86_reg machine_EDX;
struct x86_reg count_ESI; /* decrements to zero */
};
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
void aos_adopt_xmm_reg( struct aos_compilation *cp,
struct x86_reg reg,
unsigned file,
unsigned idx,
unsigned dirty );
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
unsigned file,
unsigned idx );
boolean aos_fetch_inputs( struct aos_compilation *cp,
boolean linear );
boolean aos_emit_outputs( struct aos_compilation *cp );
#define IMM_ONES 0 /* 1, 1,1,1 */
#define IMM_NEGS 1 /* 1,-1,0,0 */
#define IMM_IDENTITY 2 /* 0, 0,0,1 */
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
#define IMM_255 4 /* 255, 255, 255, 255 */
struct x86_reg aos_get_internal( struct aos_compilation *cp,
unsigned imm );
#define ERROR(cp, msg) \
do { \
debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
cp->error = 1; \
assert(0); \
} while (0)
struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;
#if 0
struct {
const void *ptr;
unsigned stride;
} attrib[PIPE_MAX_ATTRIBS];
#endif
struct aos_machine *machine; /* XXX: temporarily unshared */
vsv_run_linear_func gen_run_linear;
vsv_run_elts_func gen_run_elts;
struct x86_function func[2];
};
#endif

View File

@@ -0,0 +1,314 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/util/tgsi_parse.h"
#include "tgsi/util/tgsi_util.h"
#include "tgsi/exec/tgsi_exec.h"
#include "draw_vs.h"
#include "draw_vs_aos.h"
#include "rtasm/rtasm_x86sse.h"
#ifdef PIPE_ARCH_X86
/* Note - don't yet have to worry about interacting with the code in
* draw_vs_aos.c as there is no intermingling of generated code...
* That may have to change, we'll see.
*/
static void emit_load_R32G32B32A32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movups(cp->func, data, src_ptr);
}
static void emit_load_R32G32B32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
sse_movlps(cp->func, data, src_ptr);
}
static void emit_load_R32G32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
sse_movlps(cp->func, data, src_ptr);
}
static void emit_load_R32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
}
static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
sse2_cvtdq2ps(cp->func, data, data);
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
}
static void get_src_ptr( struct x86_function *func,
struct x86_reg src,
struct x86_reg machine,
struct x86_reg elt,
unsigned a )
{
struct x86_reg input_ptr =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_ptr));
struct x86_reg input_stride =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_stride));
/* Calculate pointer to current attrib:
*/
x86_mov(func, src, input_stride);
x86_imul(func, src, elt);
x86_add(func, src, input_ptr);
}
/* Extended swizzles? Maybe later.
*/
static void emit_swizzle( struct aos_compilation *cp,
struct x86_reg dest,
struct x86_reg src,
unsigned shuffle )
{
sse_shufps(cp->func, dest, src, shuffle);
}
static boolean load_input( struct aos_compilation *cp,
unsigned idx,
boolean linear )
{
unsigned format = cp->vaos->base.key.element[idx].in.format;
struct x86_reg src = cp->tmp_EAX;
struct x86_reg dataXMM = aos_get_xmm_reg(cp);
/* Figure out source pointer address:
*/
get_src_ptr(cp->func,
src,
cp->machine_EDX,
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
idx);
src = x86_deref(src);
aos_adopt_xmm_reg( cp,
dataXMM,
TGSI_FILE_INPUT,
idx,
TRUE );
switch (format) {
case PIPE_FORMAT_R32_FLOAT:
emit_load_R32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32_FLOAT:
emit_load_R32G32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32B32_FLOAT:
emit_load_R32G32B32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
emit_load_R32G32B32A32(cp, dataXMM, src);
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
break;
default:
ERROR(cp, "unhandled input format");
return FALSE;
}
return TRUE;
}
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
{
unsigned i;
for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) {
if (!load_input( cp, i, linear ))
return FALSE;
cp->insn_counter++;
debug_printf("\n");
}
return TRUE;
}
static void emit_store_R32G32B32A32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movups(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R32G32B32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movlps(cp->func, dst_ptr, dataXMM);
sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
}
static void emit_store_R32G32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movlps(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movss(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
sse2_packssdw(cp->func, dataXMM, dataXMM);
sse2_packuswb(cp->func, dataXMM, dataXMM);
sse_movss(cp->func, dst_ptr, dataXMM);
}
static boolean emit_output( struct aos_compilation *cp,
struct x86_reg ptr,
struct x86_reg dataXMM,
unsigned format )
{
switch (format) {
case PIPE_FORMAT_R32_FLOAT:
emit_store_R32(cp, ptr, dataXMM);
break;
case PIPE_FORMAT_R32G32_FLOAT:
emit_store_R32G32(cp, ptr, dataXMM);
break;
case PIPE_FORMAT_R32G32B32_FLOAT:
emit_store_R32G32B32(cp, ptr, dataXMM);
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
emit_store_R32G32B32A32(cp, ptr, dataXMM);
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
break;
default:
ERROR(cp, "unhandled output format");
return FALSE;
}
return TRUE;
}
boolean aos_emit_outputs( struct aos_compilation *cp )
{
unsigned i;
for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) {
unsigned format = cp->vaos->base.key.element[i].out.format;
unsigned offset = cp->vaos->base.key.element[i].out.offset;
struct x86_reg data = aos_get_shader_reg( cp,
TGSI_FILE_OUTPUT,
i );
if (data.file != file_XMM) {
struct x86_reg tmp = aos_get_xmm_reg( cp );
sse_movups(cp->func, tmp, data);
data = tmp;
}
if (!emit_output( cp,
x86_make_disp( cp->outbuf_ECX, offset ),
data,
format ))
return FALSE;
aos_release_xmm_reg( cp, data.idx );
cp->insn_counter++;
debug_printf("\n");
}
return TRUE;
}
#endif

View File

@@ -157,6 +157,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.draw = draw;
vs->base.create_varient = draw_vs_varient_generic;
// vs->base.create_varient = draw_vs_varient_aos_sse;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;