draw: add aos vertex shader varient
This commit is contained in:
@@ -35,6 +35,8 @@ C_SOURCES = \
|
||||
draw_vertex.c \
|
||||
draw_vs.c \
|
||||
draw_vs_varient.c \
|
||||
draw_vs_aos.c \
|
||||
draw_vs_aos_io.c \
|
||||
draw_vs_exec.c \
|
||||
draw_vs_llvm.c \
|
||||
draw_vs_sse.c
|
||||
|
@@ -162,6 +162,16 @@ struct draw_vertex_shader *
|
||||
draw_create_vs_llvm(struct draw_context *draw,
|
||||
const struct pipe_shader_state *templ);
|
||||
|
||||
|
||||
|
||||
struct draw_vs_varient_key;
|
||||
struct draw_vertex_shader;
|
||||
|
||||
struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
|
||||
const struct draw_vs_varient_key *key );
|
||||
|
||||
|
||||
|
||||
/********************************************************************************
|
||||
* Helpers for vs implementations that don't do their own fetch/emit varients.
|
||||
* Means these can be shared between shaders.
|
||||
|
1739
src/gallium/auxiliary/draw/draw_vs_aos.c
Normal file
1739
src/gallium/auxiliary/draw/draw_vs_aos.c
Normal file
File diff suppressed because it is too large
Load Diff
181
src/gallium/auxiliary/draw/draw_vs_aos.h
Normal file
181
src/gallium/auxiliary/draw/draw_vs_aos.h
Normal file
@@ -0,0 +1,181 @@
|
||||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
|
||||
*/
|
||||
|
||||
#ifndef DRAW_VS_AOS_H
|
||||
#define DRAW_VS_AOS_H
|
||||
|
||||
|
||||
struct tgsi_token;
|
||||
struct x86_function;
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define X 0
|
||||
#define Y 1
|
||||
#define Z 2
|
||||
#define W 3
|
||||
|
||||
#define MAX_INPUTS PIPE_MAX_ATTRIBS
|
||||
#define MAX_OUTPUTS PIPE_MAX_ATTRIBS
|
||||
#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */
|
||||
#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */
|
||||
#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */
|
||||
#define MAX_INTERNALS 4
|
||||
|
||||
#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
|
||||
|
||||
/* This is the temporary storage used by all the aos_sse vs varients.
|
||||
* Create one per context and reuse by passing a pointer in at
|
||||
* vs_varient creation??
|
||||
*/
|
||||
struct aos_machine {
|
||||
float input [MAX_INPUTS ][4];
|
||||
float output [MAX_OUTPUTS ][4];
|
||||
float temp [MAX_TEMPS ][4];
|
||||
float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
|
||||
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
|
||||
float internal [MAX_INTERNALS ][4];
|
||||
|
||||
unsigned fpu_round_nearest;
|
||||
unsigned fpu_round_neg_inf;
|
||||
|
||||
struct {
|
||||
const void *input_ptr;
|
||||
unsigned input_stride;
|
||||
|
||||
unsigned output_offset;
|
||||
} attrib[PIPE_MAX_ATTRIBS];
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
struct aos_compilation {
|
||||
struct x86_function *func;
|
||||
struct draw_vs_varient_aos_sse *vaos;
|
||||
|
||||
unsigned insn_counter;
|
||||
unsigned num_immediates;
|
||||
|
||||
struct {
|
||||
unsigned idx:16;
|
||||
unsigned file:8;
|
||||
unsigned dirty:8;
|
||||
unsigned last_used;
|
||||
} xmm[8];
|
||||
|
||||
|
||||
boolean input_fetched[PIPE_MAX_ATTRIBS];
|
||||
unsigned output_last_write[PIPE_MAX_ATTRIBS];
|
||||
|
||||
boolean have_sse2;
|
||||
boolean error;
|
||||
short fpucntl;
|
||||
|
||||
/* these are actually known values, but putting them in a struct
|
||||
* like this is helpful to keep them in sync across the file.
|
||||
*/
|
||||
struct x86_reg tmp_EAX;
|
||||
struct x86_reg idx_EBX; /* either start+i or &elt[i] */
|
||||
struct x86_reg outbuf_ECX;
|
||||
struct x86_reg machine_EDX;
|
||||
struct x86_reg count_ESI; /* decrements to zero */
|
||||
};
|
||||
|
||||
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
|
||||
void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
|
||||
|
||||
void aos_adopt_xmm_reg( struct aos_compilation *cp,
|
||||
struct x86_reg reg,
|
||||
unsigned file,
|
||||
unsigned idx,
|
||||
unsigned dirty );
|
||||
|
||||
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
|
||||
unsigned file,
|
||||
unsigned idx );
|
||||
|
||||
boolean aos_fetch_inputs( struct aos_compilation *cp,
|
||||
boolean linear );
|
||||
|
||||
boolean aos_emit_outputs( struct aos_compilation *cp );
|
||||
|
||||
|
||||
#define IMM_ONES 0 /* 1, 1,1,1 */
|
||||
#define IMM_NEGS 1 /* 1,-1,0,0 */
|
||||
#define IMM_IDENTITY 2 /* 0, 0,0,1 */
|
||||
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
|
||||
#define IMM_255 4 /* 255, 255, 255, 255 */
|
||||
|
||||
struct x86_reg aos_get_internal( struct aos_compilation *cp,
|
||||
unsigned imm );
|
||||
|
||||
|
||||
#define ERROR(cp, msg) \
|
||||
do { \
|
||||
debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
|
||||
cp->error = 1; \
|
||||
assert(0); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
struct draw_vs_varient_aos_sse {
|
||||
struct draw_vs_varient base;
|
||||
struct draw_context *draw;
|
||||
|
||||
#if 0
|
||||
struct {
|
||||
const void *ptr;
|
||||
unsigned stride;
|
||||
} attrib[PIPE_MAX_ATTRIBS];
|
||||
#endif
|
||||
|
||||
struct aos_machine *machine; /* XXX: temporarily unshared */
|
||||
|
||||
vsv_run_linear_func gen_run_linear;
|
||||
vsv_run_elts_func gen_run_elts;
|
||||
|
||||
|
||||
struct x86_function func[2];
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
314
src/gallium/auxiliary/draw/draw_vs_aos_io.c
Normal file
314
src/gallium/auxiliary/draw/draw_vs_aos_io.c
Normal file
@@ -0,0 +1,314 @@
|
||||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#include "pipe/p_util.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "tgsi/util/tgsi_parse.h"
|
||||
#include "tgsi/util/tgsi_util.h"
|
||||
#include "tgsi/exec/tgsi_exec.h"
|
||||
#include "draw_vs.h"
|
||||
#include "draw_vs_aos.h"
|
||||
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
|
||||
#ifdef PIPE_ARCH_X86
|
||||
|
||||
/* Note - don't yet have to worry about interacting with the code in
|
||||
* draw_vs_aos.c as there is no intermingling of generated code...
|
||||
* That may have to change, we'll see.
|
||||
*/
|
||||
static void emit_load_R32G32B32A32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movups(cp->func, data, src_ptr);
|
||||
}
|
||||
|
||||
static void emit_load_R32G32B32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
|
||||
sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
|
||||
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
|
||||
sse_movlps(cp->func, data, src_ptr);
|
||||
}
|
||||
|
||||
static void emit_load_R32G32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
|
||||
sse_movlps(cp->func, data, src_ptr);
|
||||
}
|
||||
|
||||
|
||||
static void emit_load_R32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movss(cp->func, data, src_ptr);
|
||||
sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
|
||||
}
|
||||
|
||||
|
||||
static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movss(cp->func, data, src_ptr);
|
||||
sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
|
||||
sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
|
||||
sse2_cvtdq2ps(cp->func, data, data);
|
||||
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void get_src_ptr( struct x86_function *func,
|
||||
struct x86_reg src,
|
||||
struct x86_reg machine,
|
||||
struct x86_reg elt,
|
||||
unsigned a )
|
||||
{
|
||||
struct x86_reg input_ptr =
|
||||
x86_make_disp(machine,
|
||||
Offset(struct aos_machine, attrib[a].input_ptr));
|
||||
|
||||
struct x86_reg input_stride =
|
||||
x86_make_disp(machine,
|
||||
Offset(struct aos_machine, attrib[a].input_stride));
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(func, src, input_stride);
|
||||
x86_imul(func, src, elt);
|
||||
x86_add(func, src, input_ptr);
|
||||
}
|
||||
|
||||
|
||||
/* Extended swizzles? Maybe later.
|
||||
*/
|
||||
static void emit_swizzle( struct aos_compilation *cp,
|
||||
struct x86_reg dest,
|
||||
struct x86_reg src,
|
||||
unsigned shuffle )
|
||||
{
|
||||
sse_shufps(cp->func, dest, src, shuffle);
|
||||
}
|
||||
|
||||
|
||||
static boolean load_input( struct aos_compilation *cp,
|
||||
unsigned idx,
|
||||
boolean linear )
|
||||
{
|
||||
unsigned format = cp->vaos->base.key.element[idx].in.format;
|
||||
struct x86_reg src = cp->tmp_EAX;
|
||||
struct x86_reg dataXMM = aos_get_xmm_reg(cp);
|
||||
|
||||
/* Figure out source pointer address:
|
||||
*/
|
||||
get_src_ptr(cp->func,
|
||||
src,
|
||||
cp->machine_EDX,
|
||||
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
|
||||
idx);
|
||||
|
||||
src = x86_deref(src);
|
||||
|
||||
aos_adopt_xmm_reg( cp,
|
||||
dataXMM,
|
||||
TGSI_FILE_INPUT,
|
||||
idx,
|
||||
TRUE );
|
||||
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R32_FLOAT:
|
||||
emit_load_R32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32_FLOAT:
|
||||
emit_load_R32G32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
emit_load_R32G32B32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
emit_load_R32G32B32A32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
|
||||
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
|
||||
break;
|
||||
default:
|
||||
ERROR(cp, "unhandled input format");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) {
|
||||
if (!load_input( cp, i, linear ))
|
||||
return FALSE;
|
||||
cp->insn_counter++;
|
||||
debug_printf("\n");
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void emit_store_R32G32B32A32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movups(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
static void emit_store_R32G32B32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movlps(cp->func, dst_ptr, dataXMM);
|
||||
sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
|
||||
sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
|
||||
}
|
||||
|
||||
static void emit_store_R32G32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movlps(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
static void emit_store_R32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movss(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
|
||||
sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
|
||||
sse2_packssdw(cp->func, dataXMM, dataXMM);
|
||||
sse2_packuswb(cp->func, dataXMM, dataXMM);
|
||||
sse_movss(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static boolean emit_output( struct aos_compilation *cp,
|
||||
struct x86_reg ptr,
|
||||
struct x86_reg dataXMM,
|
||||
unsigned format )
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R32_FLOAT:
|
||||
emit_store_R32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32_FLOAT:
|
||||
emit_store_R32G32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
emit_store_R32G32B32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
emit_store_R32G32B32A32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
|
||||
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
|
||||
break;
|
||||
default:
|
||||
ERROR(cp, "unhandled output format");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
boolean aos_emit_outputs( struct aos_compilation *cp )
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) {
|
||||
unsigned format = cp->vaos->base.key.element[i].out.format;
|
||||
unsigned offset = cp->vaos->base.key.element[i].out.offset;
|
||||
|
||||
struct x86_reg data = aos_get_shader_reg( cp,
|
||||
TGSI_FILE_OUTPUT,
|
||||
i );
|
||||
|
||||
if (data.file != file_XMM) {
|
||||
struct x86_reg tmp = aos_get_xmm_reg( cp );
|
||||
sse_movups(cp->func, tmp, data);
|
||||
data = tmp;
|
||||
}
|
||||
|
||||
if (!emit_output( cp,
|
||||
x86_make_disp( cp->outbuf_ECX, offset ),
|
||||
data,
|
||||
format ))
|
||||
return FALSE;
|
||||
|
||||
aos_release_xmm_reg( cp, data.idx );
|
||||
|
||||
cp->insn_counter++;
|
||||
debug_printf("\n");
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#endif
|
@@ -157,6 +157,7 @@ draw_create_vs_sse(struct draw_context *draw,
|
||||
|
||||
vs->base.draw = draw;
|
||||
vs->base.create_varient = draw_vs_varient_generic;
|
||||
// vs->base.create_varient = draw_vs_varient_aos_sse;
|
||||
vs->base.prepare = vs_sse_prepare;
|
||||
vs->base.run_linear = vs_sse_run_linear;
|
||||
vs->base.delete = vs_sse_delete;
|
||||
|
Reference in New Issue
Block a user