i915g: Add more optimizations

This patch adds liveness analysis to i915g and a couple
optimizations which benefit from it. One interesting
optimization turns (fake) indirect texture accesses into direct
texture accesses (the i915 supports a maximum of 4 indirect
texture accesses). Among other things this fixes a bunch of
piglit tests.
This commit is contained in:
Stéphane Marchesin
2013-09-04 12:03:10 -07:00
parent a974b915b6
commit 4e861ac4a1
3 changed files with 371 additions and 58 deletions

View File

@@ -322,6 +322,8 @@ struct i915_token_list
extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens); extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens);
extern void i915_optimize_free(struct i915_token_list* tokens); extern void i915_optimize_free(struct i915_token_list *tokens);
extern uint i915_num_coords(uint tex);
#endif #endif

View File

@@ -35,8 +35,15 @@
#include "util/u_string.h" #include "util/u_string.h"
#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1) struct i915_optimize_context
{
int first_write[TGSI_EXEC_NUM_TEMPS];
int last_read[TGSI_EXEC_NUM_TEMPS];
};
static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
{ {
return (s1->Register.File == d1->Register.File && return (s1->Register.File == d1->Register.File &&
s1->Register.Indirect == d1->Register.Indirect && s1->Register.Indirect == d1->Register.Indirect &&
@@ -44,7 +51,7 @@ static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_f
s1->Register.Index == d1->Register.Index); s1->Register.Index == d1->Register.Index);
} }
static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
{ {
return (d1->Register.File == d2->Register.File && return (d1->Register.File == d2->Register.File &&
d1->Register.Indirect == d2->Register.Indirect && d1->Register.Indirect == d2->Register.Indirect &&
@@ -52,7 +59,7 @@ static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_
d1->Register.Index == d2->Register.Index); d1->Register.Index == d2->Register.Index);
} }
static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
{ {
return (d1->Register.File == d2->Register.File && return (d1->Register.File == d2->Register.File &&
d1->Register.Indirect == d2->Register.Indirect && d1->Register.Indirect == d2->Register.Indirect &&
@@ -62,16 +69,99 @@ static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_
d1->Register.Negate == d2->Register.Negate); d1->Register.Negate == d2->Register.Negate);
} }
static boolean has_destination(unsigned opcode) const static struct {
boolean is_texture;
boolean commutes;
unsigned neutral_element;
unsigned num_dst;
unsigned num_src;
} op_table [TGSI_OPCODE_LAST] = {
[ TGSI_OPCODE_ABS ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 },
[ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 },
[ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 },
[ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
[ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
[ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
[ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 },
[ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 },
[ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 },
[ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 },
[ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 },
[ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
[ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 },
[ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 },
[ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_SUB ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 },
[ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
[ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
[ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
};
static boolean op_has_dst(unsigned opcode)
{ {
return (opcode != TGSI_OPCODE_NOP && return (op_table[opcode].num_dst > 0);
opcode != TGSI_OPCODE_KILL_IF &&
opcode != TGSI_OPCODE_KILL &&
opcode != TGSI_OPCODE_END &&
opcode != TGSI_OPCODE_RET);
} }
static boolean is_unswizzled(struct i915_full_src_register* r, static int op_num_dst(unsigned opcode)
{
return op_table[opcode].num_dst;
}
static int op_num_src(unsigned opcode)
{
return op_table[opcode].num_src;
}
static boolean op_commutes(unsigned opcode)
{
return op_table[opcode].commutes;
}
static unsigned mask_for_unswizzled(int num_components)
{
unsigned mask = 0;
switch(num_components)
{
case 4:
mask |= TGSI_WRITEMASK_W;
case 3:
mask |= TGSI_WRITEMASK_Z;
case 2:
mask |= TGSI_WRITEMASK_Y;
case 1:
mask |= TGSI_WRITEMASK_X;
}
return mask;
}
static boolean is_unswizzled(struct i915_full_src_register *r,
unsigned write_mask) unsigned write_mask)
{ {
if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
@@ -85,42 +175,26 @@ static boolean is_unswizzled(struct i915_full_src_register* r,
return TRUE; return TRUE;
} }
static boolean op_commutes(unsigned opcode) static boolean op_is_texture(unsigned opcode)
{ {
switch(opcode) return op_table[opcode].is_texture;
{
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_MUL:
case TGSI_OPCODE_DP2:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
return TRUE;
}
return FALSE;
} }
static unsigned op_neutral_element(unsigned opcode) static unsigned op_neutral_element(unsigned opcode)
{ {
switch(opcode) unsigned ne = op_table[opcode].neutral_element;
{ if (!ne) {
case TGSI_OPCODE_ADD: debug_printf("No neutral element for opcode %d\n",opcode);
return TGSI_SWIZZLE_ZERO; ne = TGSI_SWIZZLE_ZERO;
case TGSI_OPCODE_MUL:
case TGSI_OPCODE_DP2:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
return TGSI_SWIZZLE_ONE;
} }
return ne;
debug_printf("Unknown opcode %d\n",opcode);
return TGSI_SWIZZLE_ZERO;
} }
/* /*
* Sets the swizzle to the neutral element for the operation for the bits * Sets the swizzle to the neutral element for the operation for the bits
* of writemask which are set, swizzle to identity otherwise. * of writemask which are set, swizzle to identity otherwise.
*/ */
static void set_neutral_element_swizzle(struct i915_full_src_register* r, static void set_neutral_element_swizzle(struct i915_full_src_register *r,
unsigned write_mask, unsigned write_mask,
unsigned neutral) unsigned neutral)
{ {
@@ -145,7 +219,7 @@ static void set_neutral_element_swizzle(struct i915_full_src_register* r,
r->Register.SwizzleW = TGSI_SWIZZLE_W; r->Register.SwizzleW = TGSI_SWIZZLE_W;
} }
static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
{ {
o->File = i->File; o->File = i->File;
o->Indirect = i->Indirect; o->Indirect = i->Indirect;
@@ -159,7 +233,7 @@ static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_regi
o->Negate = i->Negate; o->Negate = i->Negate;
} }
static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
{ {
o->File = i->File; o->File = i->File;
o->WriteMask = i->WriteMask; o->WriteMask = i->WriteMask;
@@ -168,7 +242,7 @@ static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_regi
o->Index = i->Index; o->Index = i->Index;
} }
static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
{ {
memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
@@ -180,7 +254,7 @@ static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_
copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
} }
static void copy_token(union i915_full_token* o, union tgsi_full_token* i) static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
{ {
if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
memcpy(o, i, sizeof(*o)); memcpy(o, i, sizeof(*o));
@@ -189,6 +263,223 @@ static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
} }
static void liveness_mark_written(struct i915_optimize_context *ctx,
struct i915_full_dst_register *dst_reg,
int pos)
{
int dst_reg_index;
if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
dst_reg_index = dst_reg->Register.Index;
assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
/* dead -> live transition */
if (ctx->first_write[dst_reg_index] != -1)
ctx->first_write[dst_reg_index] = pos;
}
}
static void liveness_mark_read(struct i915_optimize_context *ctx,
struct i915_full_src_register *src_reg,
int pos)
{
int src_reg_index;
if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
src_reg_index = src_reg->Register.Index;
assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
/* live -> dead transition */
if (ctx->last_read[src_reg_index] != -1)
ctx->last_read[src_reg_index] = pos;
}
}
static void liveness_analysis(struct i915_optimize_context *ctx,
struct i915_token_list *tokens)
{
struct i915_full_dst_register *dst_reg;
struct i915_full_src_register *src_reg;
union i915_full_token *current;
unsigned opcode;
int num_dst, num_src;
int i = 0;
for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
{
ctx->first_write[i] = -1;
ctx->last_read[i] = -1;
}
for(i = 0; i < tokens->NumTokens; i++)
{
current = &tokens->Tokens[i];
if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
continue;
opcode = current->FullInstruction.Instruction.Opcode;
num_dst = op_num_dst(opcode);
switch(num_dst)
{
case 1:
dst_reg = &current->FullInstruction.Dst[0];
liveness_mark_written(ctx, dst_reg, i);
case 0:
break;
default:
debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
break;
}
}
for(i = tokens->NumTokens - 1; i >= 0; i--)
{
current = &tokens->Tokens[i];
if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
continue;
opcode = current->FullInstruction.Instruction.Opcode;
num_src = op_num_src(opcode);
switch(num_src)
{
case 3:
src_reg = &current->FullInstruction.Src[2];
liveness_mark_read(ctx, src_reg, i);
case 2:
src_reg = &current->FullInstruction.Src[1];
liveness_mark_read(ctx, src_reg, i);
case 1:
src_reg = &current->FullInstruction.Src[0];
liveness_mark_read(ctx, src_reg, i);
case 0:
break;
default:
debug_printf("Op %d has %d src regs\n", opcode, num_src);
break;
}
}
}
static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
{
int dst_reg_index = dst_reg->Register.Index;
assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
return (from >= ctx->last_read[dst_reg_index]);
}
/* Returns a mask with the components used for a texture access instruction */
static unsigned i915_tex_mask(union i915_full_token *instr)
{
unsigned mask;
/* Get the number of coords */
mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
/* Add the W component if projective */
if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
mask |= TGSI_WRITEMASK_W;
return mask;
}
static boolean target_is_texture2d(uint tex)
{
switch (tex) {
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
return true;
default:
return false;
}
}
/*
* Optimize away useless indirect texture reads:
* MOV TEMP[0].xy, IN[0].xyyy
* TEX TEMP[1], TEMP[0], SAMP[0], 2D
* into:
* TEX TEMP[1], IN[0], SAMP[0], 2D
*
* note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
*/
static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
struct i915_token_list *tokens,
int index)
{
union i915_full_token *current = &tokens->Tokens[index - 1];
union i915_full_token *next = &tokens->Tokens[index];
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
op_is_texture(next->FullInstruction.Instruction.Opcode) &&
target_is_texture2d(next->FullInstruction.Texture.Texture) &&
same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
unused_from(ctx, &current->FullInstruction.Dst[0], index))
{
memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
}
}
/*
* Optimize away things like:
* MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
* MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
* into:
* NOP
* MOV OUT[0].xyw, TEMP[1].xyww
*/
static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
{
struct i915_full_src_register *src_reg1, *src_reg2;
struct i915_full_dst_register *dst_reg1, *dst_reg2;
unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
!same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
{
src_reg1 = &current->FullInstruction.Src[0];
dst_reg1 = &current->FullInstruction.Dst[0];
src_reg2 = &next->FullInstruction.Src[0];
dst_reg2 = &next->FullInstruction.Dst[0];
/* Start with swizzles from the first mov */
swizzle_x = src_reg1->Register.SwizzleX;
swizzle_y = src_reg1->Register.SwizzleY;
swizzle_z = src_reg1->Register.SwizzleZ;
swizzle_w = src_reg1->Register.SwizzleW;
/* Pile the second mov on top */
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
swizzle_x = src_reg2->Register.SwizzleX;
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
swizzle_y = src_reg2->Register.SwizzleY;
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
swizzle_z = src_reg2->Register.SwizzleZ;
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
swizzle_w = src_reg2->Register.SwizzleW;
dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
src_reg2->Register.SwizzleX = swizzle_x;
src_reg2->Register.SwizzleY = swizzle_y;
src_reg2->Register.SwizzleZ = swizzle_z;
src_reg2->Register.SwizzleW = swizzle_w;
current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
return;
}
}
/* /*
* Optimize away things like: * Optimize away things like:
* MUL OUT[0].xyz, TEMP[1], TEMP[2] * MUL OUT[0].xyz, TEMP[1], TEMP[2]
@@ -197,7 +488,7 @@ static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
* This is useful for optimizing texenv. * This is useful for optimizing texenv.
*/ */
static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
{ {
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
@@ -254,13 +545,13 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
* into: * into:
* NOP * NOP
*/ */
static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current) static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
{ {
union i915_full_token current; union i915_full_token current;
copy_token(&current , tgsi_current); copy_token(&current , tgsi_current);
if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
has_destination(current.FullInstruction.Instruction.Opcode) && op_has_dst(current.FullInstruction.Instruction.Opcode) &&
current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
current.FullInstruction.Src[0].Register.Absolute == 0 && current.FullInstruction.Src[0].Register.Absolute == 0 &&
current.FullInstruction.Src[0].Register.Negate == 0 && current.FullInstruction.Src[0].Register.Negate == 0 &&
@@ -279,16 +570,23 @@ static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
* into: * into:
* *** OUT[0], TEMP[1], TEMP[2] * *** OUT[0], TEMP[1], TEMP[2]
*/ */
static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next) static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
struct i915_token_list *tokens,
int index)
{ {
union i915_full_token *current = &tokens->Tokens[index - 1];
union i915_full_token *next = &tokens->Tokens[index];
// &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
has_destination(current->FullInstruction.Instruction.Opcode) && op_has_dst(current->FullInstruction.Instruction.Opcode) &&
next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
next->FullInstruction.Src[0].Register.Absolute == 0 && next->FullInstruction.Src[0].Register.Absolute == 0 &&
next->FullInstruction.Src[0].Register.Negate == 0 && next->FullInstruction.Src[0].Register.Negate == 0 &&
next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT && unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) ) same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
@@ -304,8 +602,11 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
{ {
struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
struct tgsi_parse_context parse; struct tgsi_parse_context parse;
struct i915_optimize_context *ctx;
int i = 0; int i = 0;
ctx = malloc(sizeof(*ctx));
out_tokens->NumTokens = 0; out_tokens->NumTokens = 0;
/* Count the tokens */ /* Count the tokens */
@@ -330,18 +631,27 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
copy_token(&out_tokens->Tokens[i] , &parse.FullToken); copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
if (i > 0) {
i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
}
i++; i++;
} }
tgsi_parse_free (&parse); tgsi_parse_free (&parse);
liveness_analysis(ctx, out_tokens);
i = 1;
while( i < out_tokens->NumTokens) {
i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
i++;
}
free(ctx);
return out_tokens; return out_tokens;
} }
void i915_optimize_free(struct i915_token_list* tokens) void i915_optimize_free(struct i915_token_list *tokens)
{ {
free(tokens->Tokens); free(tokens->Tokens);
free(tokens); free(tokens);

View File

@@ -31,6 +31,7 @@
#include "i915_reg.h" #include "i915_reg.h"
#include "i915_context.h" #include "i915_context.h"
#include "i915_fpc.h" #include "i915_fpc.h"
#include "i915_debug_private.h"
#include "pipe/p_shader_tokens.h" #include "pipe/p_shader_tokens.h"
#include "util/u_math.h" #include "util/u_math.h"
@@ -179,7 +180,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit)
static uint static uint
src_vector(struct i915_fp_compile *p, src_vector(struct i915_fp_compile *p,
const struct i915_full_src_register *source, const struct i915_full_src_register *source,
struct i915_fragment_shader* fs) struct i915_fragment_shader *fs)
{ {
uint index = source->Register.Index; uint index = source->Register.Index;
uint src = 0, sem_name, sem_ind; uint src = 0, sem_name, sem_ind;
@@ -381,8 +382,8 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
/** /**
* Return the number of coords needed to access a given TGSI_TEXTURE_* * Return the number of coords needed to access a given TGSI_TEXTURE_*
*/ */
static uint uint
texture_num_coords(struct i915_fp_compile *p, uint tex) i915_num_coords(uint tex)
{ {
switch (tex) { switch (tex) {
case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_SHADOW1D:
@@ -400,7 +401,7 @@ texture_num_coords(struct i915_fp_compile *p, uint tex)
return 3; return 3;
default: default:
i915_program_error(p, "Num coords"); debug_printf("Unknown texture target for num coords");
return 2; return 2;
} }
} }
@@ -427,7 +428,7 @@ emit_tex(struct i915_fp_compile *p,
sampler, sampler,
coord, coord,
opcode, opcode,
texture_num_coords(p, texture) ); i915_num_coords(texture) );
} }
@@ -440,7 +441,7 @@ static void
emit_simple_arith(struct i915_fp_compile *p, emit_simple_arith(struct i915_fp_compile *p,
const struct i915_full_instruction *inst, const struct i915_full_instruction *inst,
uint opcode, uint numArgs, uint opcode, uint numArgs,
struct i915_fragment_shader* fs) struct i915_fragment_shader *fs)
{ {
uint arg1, arg2, arg3; uint arg1, arg2, arg3;
@@ -465,7 +466,7 @@ static void
emit_simple_arith_swap2(struct i915_fp_compile *p, emit_simple_arith_swap2(struct i915_fp_compile *p,
const struct i915_full_instruction *inst, const struct i915_full_instruction *inst,
uint opcode, uint numArgs, uint opcode, uint numArgs,
struct i915_fragment_shader* fs) struct i915_fragment_shader *fs)
{ {
struct i915_full_instruction inst2; struct i915_full_instruction inst2;
@@ -1108,7 +1109,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
static void i915_translate_token(struct i915_fp_compile *p, static void i915_translate_token(struct i915_fp_compile *p,
const union i915_full_token* token, const union i915_full_token *token,
struct i915_fragment_shader *fs) struct i915_fragment_shader *fs)
{ {
struct i915_fragment_shader *ifs = p->shader; struct i915_fragment_shader *ifs = p->shader;