
It's a remnant of some old NV extension. Unused. I also have a patch that removes predicates if anyone is interested. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
661 lines
25 KiB
C
661 lines
25 KiB
C
/**************************************************************************
|
|
*
|
|
* Copyright 2011 The Chromium OS authors.
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
**************************************************************************/
|
|
|
|
#include "i915_reg.h"
|
|
#include "i915_context.h"
|
|
#include "i915_fpc.h"
|
|
|
|
#include "pipe/p_shader_tokens.h"
|
|
#include "util/u_math.h"
|
|
#include "util/u_memory.h"
|
|
#include "util/u_string.h"
|
|
#include "tgsi/tgsi_parse.h"
|
|
#include "tgsi/tgsi_dump.h"
|
|
#include "tgsi/tgsi_exec.h"
|
|
|
|
struct i915_optimize_context
|
|
{
|
|
int first_write[TGSI_EXEC_NUM_TEMPS];
|
|
int last_read[TGSI_EXEC_NUM_TEMPS];
|
|
};
|
|
|
|
static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
|
|
{
|
|
return (s1->Register.File == d1->Register.File &&
|
|
s1->Register.Indirect == d1->Register.Indirect &&
|
|
s1->Register.Dimension == d1->Register.Dimension &&
|
|
s1->Register.Index == d1->Register.Index);
|
|
}
|
|
|
|
static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
|
|
{
|
|
return (d1->Register.File == d2->Register.File &&
|
|
d1->Register.Indirect == d2->Register.Indirect &&
|
|
d1->Register.Dimension == d2->Register.Dimension &&
|
|
d1->Register.Index == d2->Register.Index);
|
|
}
|
|
|
|
static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
|
|
{
|
|
return (d1->Register.File == d2->Register.File &&
|
|
d1->Register.Indirect == d2->Register.Indirect &&
|
|
d1->Register.Dimension == d2->Register.Dimension &&
|
|
d1->Register.Index == d2->Register.Index &&
|
|
d1->Register.Absolute == d2->Register.Absolute &&
|
|
d1->Register.Negate == d2->Register.Negate);
|
|
}
|
|
|
|
const static struct {
|
|
boolean is_texture;
|
|
boolean commutes;
|
|
unsigned neutral_element;
|
|
unsigned num_dst;
|
|
unsigned num_src;
|
|
} op_table [TGSI_OPCODE_LAST] = {
|
|
[ TGSI_OPCODE_ABS ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 },
|
|
[ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 },
|
|
[ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 },
|
|
[ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
|
|
[ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
|
|
[ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
|
|
[ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 },
|
|
[ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 },
|
|
[ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 },
|
|
[ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 },
|
|
[ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 },
|
|
[ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
|
|
[ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 },
|
|
[ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 },
|
|
[ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_SUB ] = { false, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
|
|
[ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
|
|
[ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
|
|
};
|
|
|
|
static boolean op_has_dst(unsigned opcode)
|
|
{
|
|
return (op_table[opcode].num_dst > 0);
|
|
}
|
|
|
|
static int op_num_dst(unsigned opcode)
|
|
{
|
|
return op_table[opcode].num_dst;
|
|
}
|
|
|
|
static int op_num_src(unsigned opcode)
|
|
{
|
|
return op_table[opcode].num_src;
|
|
}
|
|
|
|
static boolean op_commutes(unsigned opcode)
|
|
{
|
|
return op_table[opcode].commutes;
|
|
}
|
|
|
|
static unsigned mask_for_unswizzled(int num_components)
|
|
{
|
|
unsigned mask = 0;
|
|
switch(num_components)
|
|
{
|
|
case 4:
|
|
mask |= TGSI_WRITEMASK_W;
|
|
case 3:
|
|
mask |= TGSI_WRITEMASK_Z;
|
|
case 2:
|
|
mask |= TGSI_WRITEMASK_Y;
|
|
case 1:
|
|
mask |= TGSI_WRITEMASK_X;
|
|
}
|
|
return mask;
|
|
}
|
|
|
|
static boolean is_unswizzled(struct i915_full_src_register *r,
|
|
unsigned write_mask)
|
|
{
|
|
if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
|
|
return FALSE;
|
|
if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
|
|
return FALSE;
|
|
if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
|
|
return FALSE;
|
|
if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
|
|
return FALSE;
|
|
return TRUE;
|
|
}
|
|
|
|
static boolean op_is_texture(unsigned opcode)
|
|
{
|
|
return op_table[opcode].is_texture;
|
|
}
|
|
|
|
static unsigned op_neutral_element(unsigned opcode)
|
|
{
|
|
unsigned ne = op_table[opcode].neutral_element;
|
|
if (!ne) {
|
|
debug_printf("No neutral element for opcode %d\n",opcode);
|
|
ne = TGSI_SWIZZLE_ZERO;
|
|
}
|
|
return ne;
|
|
}
|
|
|
|
/*
|
|
* Sets the swizzle to the neutral element for the operation for the bits
|
|
* of writemask which are set, swizzle to identity otherwise.
|
|
*/
|
|
static void set_neutral_element_swizzle(struct i915_full_src_register *r,
|
|
unsigned write_mask,
|
|
unsigned neutral)
|
|
{
|
|
if ( write_mask & TGSI_WRITEMASK_X )
|
|
r->Register.SwizzleX = neutral;
|
|
else
|
|
r->Register.SwizzleX = TGSI_SWIZZLE_X;
|
|
|
|
if ( write_mask & TGSI_WRITEMASK_Y )
|
|
r->Register.SwizzleY = neutral;
|
|
else
|
|
r->Register.SwizzleY = TGSI_SWIZZLE_Y;
|
|
|
|
if ( write_mask & TGSI_WRITEMASK_Z )
|
|
r->Register.SwizzleZ = neutral;
|
|
else
|
|
r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
|
|
|
|
if ( write_mask & TGSI_WRITEMASK_W )
|
|
r->Register.SwizzleW = neutral;
|
|
else
|
|
r->Register.SwizzleW = TGSI_SWIZZLE_W;
|
|
}
|
|
|
|
static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
|
|
{
|
|
o->File = i->File;
|
|
o->Indirect = i->Indirect;
|
|
o->Dimension = i->Dimension;
|
|
o->Index = i->Index;
|
|
o->SwizzleX = i->SwizzleX;
|
|
o->SwizzleY = i->SwizzleY;
|
|
o->SwizzleZ = i->SwizzleZ;
|
|
o->SwizzleW = i->SwizzleW;
|
|
o->Absolute = i->Absolute;
|
|
o->Negate = i->Negate;
|
|
}
|
|
|
|
static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
|
|
{
|
|
o->File = i->File;
|
|
o->WriteMask = i->WriteMask;
|
|
o->Indirect = i->Indirect;
|
|
o->Dimension = i->Dimension;
|
|
o->Index = i->Index;
|
|
}
|
|
|
|
static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
|
|
{
|
|
memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
|
|
memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
|
|
|
|
copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
|
|
|
|
copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
|
|
copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
|
|
copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
|
|
}
|
|
|
|
static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
|
|
{
|
|
if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
|
|
memcpy(o, i, sizeof(*o));
|
|
else
|
|
copy_instruction(&o->FullInstruction, &i->FullInstruction);
|
|
|
|
}
|
|
|
|
static void liveness_mark_written(struct i915_optimize_context *ctx,
|
|
struct i915_full_dst_register *dst_reg,
|
|
int pos)
|
|
{
|
|
int dst_reg_index;
|
|
if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
|
|
dst_reg_index = dst_reg->Register.Index;
|
|
assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
|
|
/* dead -> live transition */
|
|
if (ctx->first_write[dst_reg_index] != -1)
|
|
ctx->first_write[dst_reg_index] = pos;
|
|
}
|
|
}
|
|
|
|
static void liveness_mark_read(struct i915_optimize_context *ctx,
|
|
struct i915_full_src_register *src_reg,
|
|
int pos)
|
|
{
|
|
int src_reg_index;
|
|
if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
|
|
src_reg_index = src_reg->Register.Index;
|
|
assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
|
|
/* live -> dead transition */
|
|
if (ctx->last_read[src_reg_index] != -1)
|
|
ctx->last_read[src_reg_index] = pos;
|
|
}
|
|
}
|
|
|
|
static void liveness_analysis(struct i915_optimize_context *ctx,
|
|
struct i915_token_list *tokens)
|
|
{
|
|
struct i915_full_dst_register *dst_reg;
|
|
struct i915_full_src_register *src_reg;
|
|
union i915_full_token *current;
|
|
unsigned opcode;
|
|
int num_dst, num_src;
|
|
int i = 0;
|
|
|
|
for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
|
|
{
|
|
ctx->first_write[i] = -1;
|
|
ctx->last_read[i] = -1;
|
|
}
|
|
|
|
for(i = 0; i < tokens->NumTokens; i++)
|
|
{
|
|
current = &tokens->Tokens[i];
|
|
|
|
if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
|
|
continue;
|
|
|
|
opcode = current->FullInstruction.Instruction.Opcode;
|
|
num_dst = op_num_dst(opcode);
|
|
|
|
switch(num_dst)
|
|
{
|
|
case 1:
|
|
dst_reg = ¤t->FullInstruction.Dst[0];
|
|
liveness_mark_written(ctx, dst_reg, i);
|
|
case 0:
|
|
break;
|
|
default:
|
|
debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
|
|
break;
|
|
}
|
|
}
|
|
|
|
for(i = tokens->NumTokens - 1; i >= 0; i--)
|
|
{
|
|
current = &tokens->Tokens[i];
|
|
|
|
if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
|
|
continue;
|
|
|
|
opcode = current->FullInstruction.Instruction.Opcode;
|
|
num_src = op_num_src(opcode);
|
|
|
|
switch(num_src)
|
|
{
|
|
case 3:
|
|
src_reg = ¤t->FullInstruction.Src[2];
|
|
liveness_mark_read(ctx, src_reg, i);
|
|
case 2:
|
|
src_reg = ¤t->FullInstruction.Src[1];
|
|
liveness_mark_read(ctx, src_reg, i);
|
|
case 1:
|
|
src_reg = ¤t->FullInstruction.Src[0];
|
|
liveness_mark_read(ctx, src_reg, i);
|
|
case 0:
|
|
break;
|
|
default:
|
|
debug_printf("Op %d has %d src regs\n", opcode, num_src);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
|
|
{
|
|
int dst_reg_index = dst_reg->Register.Index;
|
|
assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
|
|
return (from >= ctx->last_read[dst_reg_index]);
|
|
}
|
|
|
|
/* Returns a mask with the components used for a texture access instruction */
|
|
static unsigned i915_tex_mask(union i915_full_token *instr)
|
|
{
|
|
unsigned mask;
|
|
|
|
/* Get the number of coords */
|
|
mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
|
|
|
|
/* Add the W component if projective */
|
|
if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
|
|
mask |= TGSI_WRITEMASK_W;
|
|
|
|
return mask;
|
|
}
|
|
|
|
static boolean target_is_texture2d(uint tex)
|
|
{
|
|
switch (tex) {
|
|
case TGSI_TEXTURE_2D:
|
|
case TGSI_TEXTURE_RECT:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Optimize away useless indirect texture reads:
|
|
* MOV TEMP[0].xy, IN[0].xyyy
|
|
* TEX TEMP[1], TEMP[0], SAMP[0], 2D
|
|
* into:
|
|
* TEX TEMP[1], IN[0], SAMP[0], 2D
|
|
*
|
|
* note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
|
|
*/
|
|
static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
|
|
struct i915_token_list *tokens,
|
|
int index)
|
|
{
|
|
union i915_full_token *current = &tokens->Tokens[index - 1];
|
|
union i915_full_token *next = &tokens->Tokens[index];
|
|
|
|
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
|
|
op_is_texture(next->FullInstruction.Instruction.Opcode) &&
|
|
target_is_texture2d(next->FullInstruction.Texture.Texture) &&
|
|
same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) &&
|
|
is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) &&
|
|
unused_from(ctx, ¤t->FullInstruction.Dst[0], index))
|
|
{
|
|
memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0], sizeof(struct i915_src_register));
|
|
current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Optimize away things like:
|
|
* MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
|
|
* MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
|
|
* into:
|
|
* NOP
|
|
* MOV OUT[0].xyw, TEMP[1].xyww
|
|
*/
|
|
static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
|
|
{
|
|
struct i915_full_src_register *src_reg1, *src_reg2;
|
|
struct i915_full_dst_register *dst_reg1, *dst_reg2;
|
|
unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
|
|
|
|
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
|
|
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
|
|
current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
|
|
same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) &&
|
|
same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) &&
|
|
!same_src_dst_reg(¤t->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) )
|
|
{
|
|
src_reg1 = ¤t->FullInstruction.Src[0];
|
|
dst_reg1 = ¤t->FullInstruction.Dst[0];
|
|
src_reg2 = &next->FullInstruction.Src[0];
|
|
dst_reg2 = &next->FullInstruction.Dst[0];
|
|
|
|
/* Start with swizzles from the first mov */
|
|
swizzle_x = src_reg1->Register.SwizzleX;
|
|
swizzle_y = src_reg1->Register.SwizzleY;
|
|
swizzle_z = src_reg1->Register.SwizzleZ;
|
|
swizzle_w = src_reg1->Register.SwizzleW;
|
|
|
|
/* Pile the second mov on top */
|
|
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
|
|
swizzle_x = src_reg2->Register.SwizzleX;
|
|
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
|
|
swizzle_y = src_reg2->Register.SwizzleY;
|
|
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
|
|
swizzle_z = src_reg2->Register.SwizzleZ;
|
|
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
|
|
swizzle_w = src_reg2->Register.SwizzleW;
|
|
|
|
dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
|
|
src_reg2->Register.SwizzleX = swizzle_x;
|
|
src_reg2->Register.SwizzleY = swizzle_y;
|
|
src_reg2->Register.SwizzleZ = swizzle_z;
|
|
src_reg2->Register.SwizzleW = swizzle_w;
|
|
|
|
current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Optimize away things like:
|
|
* MUL OUT[0].xyz, TEMP[1], TEMP[2]
|
|
* MOV OUT[0].w, TEMP[2]
|
|
* into:
|
|
* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
|
|
* This is useful for optimizing texenv.
|
|
*/
|
|
static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
|
|
{
|
|
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
op_commutes(current->FullInstruction.Instruction.Opcode) &&
|
|
current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
|
|
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
|
|
same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) &&
|
|
same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) &&
|
|
!same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) &&
|
|
is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
|
|
is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
|
|
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
|
|
{
|
|
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
|
|
|
|
set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0);
|
|
set_neutral_element_swizzle(¤t->FullInstruction.Src[0],
|
|
next->FullInstruction.Dst[0].Register.WriteMask,
|
|
op_neutral_element(current->FullInstruction.Instruction.Opcode));
|
|
|
|
current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
|
|
next->FullInstruction.Dst[0].Register.WriteMask;
|
|
return;
|
|
}
|
|
|
|
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
op_commutes(current->FullInstruction.Instruction.Opcode) &&
|
|
current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
|
|
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
|
|
same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) &&
|
|
same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) &&
|
|
!same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) &&
|
|
is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
|
|
is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
|
|
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
|
|
{
|
|
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
|
|
|
|
set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0);
|
|
set_neutral_element_swizzle(¤t->FullInstruction.Src[1],
|
|
next->FullInstruction.Dst[0].Register.WriteMask,
|
|
op_neutral_element(current->FullInstruction.Instruction.Opcode));
|
|
|
|
current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
|
|
next->FullInstruction.Dst[0].Register.WriteMask;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Optimize away things like:
|
|
* MOV TEMP[0].xyz TEMP[0].xyzx
|
|
* into:
|
|
* NOP
|
|
*/
|
|
static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
|
|
{
|
|
union i915_full_token current;
|
|
copy_token(¤t , tgsi_current);
|
|
if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
|
|
op_has_dst(current.FullInstruction.Instruction.Opcode) &&
|
|
!current.FullInstruction.Instruction.Saturate &&
|
|
current.FullInstruction.Src[0].Register.Absolute == 0 &&
|
|
current.FullInstruction.Src[0].Register.Negate == 0 &&
|
|
is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
|
|
same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) )
|
|
{
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* Optimize away things like:
|
|
* *** TEMP[0], TEMP[1], TEMP[2]
|
|
* MOV OUT[0] TEMP[0]
|
|
* into:
|
|
* *** OUT[0], TEMP[1], TEMP[2]
|
|
*/
|
|
static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
|
|
struct i915_token_list *tokens,
|
|
int index)
|
|
{
|
|
union i915_full_token *current = &tokens->Tokens[index - 1];
|
|
union i915_full_token *next = &tokens->Tokens[index];
|
|
|
|
// &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
|
|
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
|
|
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
|
|
op_has_dst(current->FullInstruction.Instruction.Opcode) &&
|
|
!next->FullInstruction.Instruction.Saturate &&
|
|
next->FullInstruction.Src[0].Register.Absolute == 0 &&
|
|
next->FullInstruction.Src[0].Register.Negate == 0 &&
|
|
unused_from(ctx, ¤t->FullInstruction.Dst[0], index) &&
|
|
current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
|
|
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
|
|
current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
|
|
same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) )
|
|
{
|
|
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
|
|
|
|
current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
|
|
return;
|
|
}
|
|
}
|
|
|
|
struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
|
|
{
|
|
struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
|
|
struct tgsi_parse_context parse;
|
|
struct i915_optimize_context *ctx;
|
|
int i = 0;
|
|
|
|
ctx = malloc(sizeof(*ctx));
|
|
|
|
out_tokens->NumTokens = 0;
|
|
|
|
/* Count the tokens */
|
|
tgsi_parse_init( &parse, tokens );
|
|
while( !tgsi_parse_end_of_tokens( &parse ) ) {
|
|
tgsi_parse_token( &parse );
|
|
out_tokens->NumTokens++;
|
|
}
|
|
tgsi_parse_free (&parse);
|
|
|
|
/* Allocate our tokens */
|
|
out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
|
|
|
|
tgsi_parse_init( &parse, tokens );
|
|
while( !tgsi_parse_end_of_tokens( &parse ) ) {
|
|
tgsi_parse_token( &parse );
|
|
|
|
if (i915_fpc_useless_mov(&parse.FullToken)) {
|
|
out_tokens->NumTokens--;
|
|
continue;
|
|
}
|
|
|
|
copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
|
|
|
|
i++;
|
|
}
|
|
tgsi_parse_free (&parse);
|
|
|
|
liveness_analysis(ctx, out_tokens);
|
|
|
|
i = 1;
|
|
while( i < out_tokens->NumTokens) {
|
|
i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
|
|
i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
|
|
i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
|
|
i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
|
|
i++;
|
|
}
|
|
|
|
free(ctx);
|
|
|
|
return out_tokens;
|
|
}
|
|
|
|
void i915_optimize_free(struct i915_token_list *tokens)
|
|
{
|
|
free(tokens->Tokens);
|
|
free(tokens);
|
|
}
|
|
|
|
|