gallium: remove TGSI opcode XPD
use MUL+MAD+MOV instead. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
@@ -858,61 +858,6 @@ static void fmin_emit(
|
||||
emit_data->args[1], emit_data->args[0], "");
|
||||
}
|
||||
|
||||
/* TGSI_OPCODE_XPD */
|
||||
|
||||
static void
|
||||
xpd_fetch_args(
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
dp_fetch_args(bld_base, emit_data, 3);
|
||||
}
|
||||
|
||||
/**
|
||||
* (a * b) - (c * d)
|
||||
*/
|
||||
static LLVMValueRef
|
||||
xpd_helper(
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef c,
|
||||
LLVMValueRef d)
|
||||
{
|
||||
LLVMValueRef tmp0, tmp1;
|
||||
|
||||
tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
|
||||
tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
|
||||
|
||||
return lp_build_sub(&bld_base->base, tmp0, tmp1);
|
||||
}
|
||||
|
||||
static void
|
||||
xpd_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
|
||||
emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
|
||||
emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
|
||||
|
||||
emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
|
||||
emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
|
||||
emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
|
||||
|
||||
emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
|
||||
emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
|
||||
emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
|
||||
|
||||
emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
|
||||
}
|
||||
|
||||
const struct lp_build_tgsi_action xpd_action = {
|
||||
xpd_fetch_args, /* fetch_args */
|
||||
xpd_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_D2F */
|
||||
static void
|
||||
d2f_emit(
|
||||
@@ -1252,7 +1197,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
|
||||
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
|
||||
|
||||
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
|
||||
bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
|
||||
|
@@ -590,9 +590,6 @@ lp_emit_instruction_aos(
|
||||
dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_XPD:
|
||||
return FALSE;
|
||||
|
||||
case TGSI_OPCODE_COS:
|
||||
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
|
||||
tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
|
||||
|
@@ -985,21 +985,6 @@ ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
|
||||
}
|
||||
|
||||
static void
|
||||
ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
ttn_move_dest_masked(b, dest,
|
||||
nir_fsub(b,
|
||||
nir_fmul(b,
|
||||
ttn_swizzle(b, src[0], Y, Z, X, X),
|
||||
ttn_swizzle(b, src[1], Z, X, Y, X)),
|
||||
nir_fmul(b,
|
||||
ttn_swizzle(b, src[1], Y, Z, X, X),
|
||||
ttn_swizzle(b, src[0], Z, X, Y, X))),
|
||||
TGSI_WRITEMASK_XYZ);
|
||||
ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
|
||||
}
|
||||
|
||||
static void
|
||||
ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
@@ -1526,7 +1511,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
|
||||
[TGSI_OPCODE_EX2] = nir_op_fexp2,
|
||||
[TGSI_OPCODE_LG2] = nir_op_flog2,
|
||||
[TGSI_OPCODE_POW] = nir_op_fpow,
|
||||
[TGSI_OPCODE_XPD] = 0,
|
||||
[TGSI_OPCODE_COS] = nir_op_fcos,
|
||||
[TGSI_OPCODE_DDX] = nir_op_fddx,
|
||||
[TGSI_OPCODE_DDY] = nir_op_fddy,
|
||||
@@ -1739,10 +1723,6 @@ ttn_emit_instruction(struct ttn_compile *c)
|
||||
ttn_lit(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_XPD:
|
||||
ttn_xpd(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DP2:
|
||||
ttn_dp2(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
@@ -3312,51 +3312,6 @@ exec_scs(struct tgsi_exec_machine *mach,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_xpd(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
union tgsi_exec_channel r[6];
|
||||
union tgsi_exec_channel d[3];
|
||||
|
||||
fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
|
||||
|
||||
micro_mul(&r[2], &r[0], &r[1]);
|
||||
|
||||
fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
|
||||
micro_mul(&r[5], &r[3], &r[4] );
|
||||
micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
|
||||
|
||||
fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
|
||||
micro_mul(&r[3], &r[3], &r[2]);
|
||||
|
||||
fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
|
||||
micro_mul(&r[1], &r[1], &r[5]);
|
||||
micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
|
||||
|
||||
micro_mul(&r[5], &r[5], &r[4]);
|
||||
micro_mul(&r[0], &r[0], &r[2]);
|
||||
micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
|
||||
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
|
||||
store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
|
||||
store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
|
||||
store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
|
||||
store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_dst(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
@@ -5153,10 +5108,6 @@ exec_instruction(
|
||||
exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_XPD:
|
||||
exec_xpd(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_COS:
|
||||
exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
|
||||
break;
|
||||
|
@@ -68,7 +68,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "", 31 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
|
||||
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
|
||||
|
@@ -258,65 +258,6 @@ transform_dst(struct tgsi_transform_context *tctx,
|
||||
}
|
||||
}
|
||||
|
||||
/* XPD - Cross Product
|
||||
* dst.x = src0.y \times src1.z - src1.y \times src0.z
|
||||
* dst.y = src0.z \times src1.x - src1.z \times src0.x
|
||||
* dst.z = src0.x \times src1.y - src1.x \times src0.y
|
||||
* dst.w = 1.0
|
||||
*
|
||||
* ; needs: 1 tmp, imm{1.0}
|
||||
* MUL tmpA.xyz, src1.yzx, src0.zxy
|
||||
* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
|
||||
* MOV dst.w, imm{1.0}
|
||||
*/
|
||||
#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
|
||||
#define XPD_TMP 1
|
||||
static void
|
||||
transform_xpd(struct tgsi_transform_context *tctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
|
||||
struct tgsi_full_dst_register *dst = &inst->Dst[0];
|
||||
struct tgsi_full_src_register *src0 = &inst->Src[0];
|
||||
struct tgsi_full_src_register *src1 = &inst->Src[1];
|
||||
struct tgsi_full_instruction new_inst;
|
||||
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
|
||||
/* MUL tmpA.xyz, src1.yzx, src0.zxy */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
|
||||
new_inst.Instruction.NumSrcRegs = 2;
|
||||
reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
|
||||
reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
/* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
|
||||
new_inst.Instruction.NumSrcRegs = 3;
|
||||
reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
|
||||
reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
|
||||
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
|
||||
new_inst.Src[2].Register.Negate = true;
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
}
|
||||
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
|
||||
/* MOV dst.w, imm{1.0} */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
|
||||
new_inst.Instruction.NumSrcRegs = 1;
|
||||
reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
}
|
||||
}
|
||||
|
||||
/* SCS - Sine Cosine
|
||||
* dst.x = \cos{src.x}
|
||||
* dst.y = \sin{src.x}
|
||||
@@ -1466,11 +1407,6 @@ transform_instr(struct tgsi_transform_context *tctx,
|
||||
goto skip;
|
||||
transform_dst(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_XPD:
|
||||
if (!ctx->config->lower_XPD)
|
||||
goto skip;
|
||||
transform_xpd(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_SCS:
|
||||
if (!ctx->config->lower_SCS)
|
||||
goto skip;
|
||||
@@ -1599,7 +1535,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
||||
#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
|
||||
/* if there are no instructions to lower, then we are done: */
|
||||
if (!(OPCS(DST) ||
|
||||
OPCS(XPD) ||
|
||||
OPCS(SCS) ||
|
||||
OPCS(LRP) ||
|
||||
OPCS(FRC) ||
|
||||
@@ -1629,10 +1564,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
||||
newlen += DST_GROW * OPCS(DST);
|
||||
numtmp = MAX2(numtmp, DST_TMP);
|
||||
}
|
||||
if (OPCS(XPD)) {
|
||||
newlen += XPD_GROW * OPCS(XPD);
|
||||
numtmp = MAX2(numtmp, XPD_TMP);
|
||||
}
|
||||
if (OPCS(SCS)) {
|
||||
newlen += SCS_GROW * OPCS(SCS);
|
||||
numtmp = MAX2(numtmp, SCS_TMP);
|
||||
|
@@ -55,7 +55,6 @@ struct tgsi_lowering_config
|
||||
* enable lowering of TGSI_OPCODE_<opc>
|
||||
*/
|
||||
unsigned lower_DST:1;
|
||||
unsigned lower_XPD:1;
|
||||
unsigned lower_SCS:1;
|
||||
unsigned lower_LRP:1;
|
||||
unsigned lower_FRC:1;
|
||||
|
@@ -63,7 +63,6 @@ OP11(ROUND)
|
||||
OP11(EX2)
|
||||
OP11(LG2)
|
||||
OP12(POW)
|
||||
OP12(XPD)
|
||||
OP11(COS)
|
||||
OP11(DDX)
|
||||
OP11(DDY)
|
||||
|
@@ -350,18 +350,6 @@ This instruction replicates its result.
|
||||
|
||||
dst = src0.x^{src1.x}
|
||||
|
||||
.. opcode:: XPD - Cross Product
|
||||
|
||||
.. math::
|
||||
|
||||
dst.x = src0.y \times src1.z - src1.y \times src0.z
|
||||
|
||||
dst.y = src0.z \times src1.x - src1.z \times src0.x
|
||||
|
||||
dst.z = src0.x \times src1.y - src1.x \times src0.y
|
||||
|
||||
dst.w = 1
|
||||
|
||||
|
||||
.. opcode:: COS - Cosine
|
||||
|
||||
@@ -3663,7 +3651,7 @@ of the operands are equal to 0. That means that 0 * Inf = 0. This
|
||||
should be set the same way for an entire pipeline. Note that this
|
||||
applies not only to the literal MUL TGSI opcode, but all FP32
|
||||
multiplications implied by other operations, such as MAD, FMA, DP2,
|
||||
DP3, DP4, DST, LOG, LRP, XPD, and possibly others. If there is a
|
||||
DP3, DP4, DST, LOG, LRP, and possibly others. If there is a
|
||||
mismatch between shaders, then it is unspecified whether this behavior
|
||||
will be enabled.
|
||||
|
||||
|
@@ -2317,7 +2317,6 @@ etna_compile_shader(struct etna_shader_variant *v)
|
||||
.lower_LOG = true,
|
||||
.lower_DP2 = true,
|
||||
.lower_TRUNC = true,
|
||||
.lower_XPD = true
|
||||
};
|
||||
|
||||
c = CALLOC_STRUCT(etna_compile);
|
||||
|
@@ -118,7 +118,6 @@ static const struct {
|
||||
[ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
|
||||
[ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
|
||||
[ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
|
||||
[ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
|
||||
};
|
||||
|
||||
static boolean op_has_dst(unsigned opcode)
|
||||
|
@@ -1027,32 +1027,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
|
||||
emit_tex(p, inst, T0_TEXLDP, fs);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_XPD:
|
||||
/* Cross product:
|
||||
* result.x = src0.y * src1.z - src0.z * src1.y;
|
||||
* result.y = src0.z * src1.x - src0.x * src1.z;
|
||||
* result.z = src0.x * src1.y - src0.y * src1.x;
|
||||
* result.w = undef;
|
||||
*/
|
||||
src0 = src_vector(p, &inst->Src[0], fs);
|
||||
src1 = src_vector(p, &inst->Src[1], fs);
|
||||
tmp = i915_get_utemp(p);
|
||||
|
||||
i915_emit_arith(p,
|
||||
A0_MUL,
|
||||
tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
swizzle(src0, Z, X, Y, ONE),
|
||||
swizzle(src1, Y, Z, X, ONE), 0);
|
||||
|
||||
i915_emit_arith(p,
|
||||
A0_MAD,
|
||||
get_result_vector(p, &inst->Dst[0]),
|
||||
get_result_flags(inst), 0,
|
||||
swizzle(src0, Y, Z, X, ONE),
|
||||
swizzle(src1, Z, X, Y, ONE),
|
||||
negate(tmp, 1, 1, 1, 0));
|
||||
break;
|
||||
|
||||
default:
|
||||
i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
|
||||
p->error = 1;
|
||||
|
@@ -346,14 +346,6 @@ unsigned int Instruction::srcMask(unsigned int s) const
|
||||
return mask;
|
||||
case TGSI_OPCODE_TXQ:
|
||||
return 1;
|
||||
case TGSI_OPCODE_XPD:
|
||||
{
|
||||
unsigned int x = 0;
|
||||
if (mask & 1) x |= 0x6;
|
||||
if (mask & 2) x |= 0x5;
|
||||
if (mask & 4) x |= 0x3;
|
||||
return x;
|
||||
}
|
||||
case TGSI_OPCODE_D2I:
|
||||
case TGSI_OPCODE_D2U:
|
||||
case TGSI_OPCODE_D2F:
|
||||
@@ -3347,25 +3339,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
case TGSI_OPCODE_LIT:
|
||||
handleLIT(dst0);
|
||||
break;
|
||||
case TGSI_OPCODE_XPD:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
if (c < 3) {
|
||||
val0 = getSSA();
|
||||
src0 = fetchSrc(1, (c + 1) % 3);
|
||||
src1 = fetchSrc(0, (c + 2) % 3);
|
||||
mkOp2(OP_MUL, TYPE_F32, val0, src0, src1)
|
||||
->dnz = info->io.mul_zero_wins;
|
||||
mkOp1(OP_NEG, TYPE_F32, val0, val0);
|
||||
|
||||
src0 = fetchSrc(0, (c + 1) % 3);
|
||||
src1 = fetchSrc(1, (c + 2) % 3);
|
||||
mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0)
|
||||
->dnz = info->io.mul_zero_wins;
|
||||
} else {
|
||||
loadImm(dst0[c], 1.0f);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_ISSG:
|
||||
case TGSI_OPCODE_SSG:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
|
@@ -10,7 +10,6 @@
|
||||
* POW - EX2 + MUL + LG2
|
||||
* SUB - ADD, second source negated
|
||||
* SWZ - MOV
|
||||
* XPD -
|
||||
*
|
||||
* Register access
|
||||
* - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
|
||||
|
@@ -774,11 +774,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
|
||||
case TGSI_OPCODE_TXP:
|
||||
nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
|
||||
break;
|
||||
case TGSI_OPCODE_XPD:
|
||||
tmp = nvfx_src(temp(fpc));
|
||||
nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
|
||||
nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_IF:
|
||||
// MOVRC0 R31 (TR0.xyzw), R<src>:
|
||||
|
@@ -164,7 +164,6 @@
|
||||
* RSQ - LG2 + EX2
|
||||
* POW - LG2 + MUL + EX2
|
||||
* SCS - COS + SIN
|
||||
* XPD
|
||||
*
|
||||
* NV40 Looping
|
||||
* Loops appear to be fairly expensive on NV40 at least, the proprietary
|
||||
|
@@ -683,11 +683,6 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
|
||||
insn.cc_test = NVFX_COND_LT;
|
||||
nvfx_vp_emit(vpc, insn);
|
||||
break;
|
||||
case TGSI_OPCODE_XPD:
|
||||
tmp = nvfx_src(temp(vpc));
|
||||
nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
|
||||
nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
|
||||
break;
|
||||
case TGSI_OPCODE_IF:
|
||||
insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
|
||||
insn.cc_update = 1;
|
||||
|
@@ -57,7 +57,6 @@ static unsigned translate_opcode(unsigned opcode)
|
||||
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
|
||||
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
|
||||
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
|
||||
case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
|
||||
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
|
||||
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
|
||||
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
|
||||
|
@@ -7844,78 +7844,6 @@ static int tgsi_ucmp(struct r600_shader_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_xpd(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
static const unsigned int src0_swizzle[] = {2, 0, 1};
|
||||
static const unsigned int src1_swizzle[] = {1, 2, 0};
|
||||
struct r600_bytecode_alu alu;
|
||||
uint32_t use_temp = 0;
|
||||
int i, r;
|
||||
|
||||
if (inst->Dst[0].Register.WriteMask != 0xf)
|
||||
use_temp = 1;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_MUL;
|
||||
if (i < 3) {
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
|
||||
} else {
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_0;
|
||||
alu.src[0].chan = i;
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_0;
|
||||
alu.src[1].chan = i;
|
||||
}
|
||||
|
||||
alu.dst.sel = ctx->temp_reg;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP3_MULADD;
|
||||
|
||||
if (i < 3) {
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
|
||||
} else {
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_0;
|
||||
alu.src[0].chan = i;
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_0;
|
||||
alu.src[1].chan = i;
|
||||
}
|
||||
|
||||
alu.src[2].sel = ctx->temp_reg;
|
||||
alu.src[2].neg = 1;
|
||||
alu.src[2].chan = i;
|
||||
|
||||
if (use_temp)
|
||||
alu.dst.sel = ctx->temp_reg;
|
||||
else
|
||||
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
alu.is_op3 = 1;
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
if (use_temp)
|
||||
return tgsi_helper_copy(ctx, inst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_exp(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
@@ -9092,7 +9020,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
|
||||
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
|
||||
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
|
||||
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
|
||||
[TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
|
||||
[31] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[32] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[33] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[34] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
@@ -9290,7 +9218,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
|
||||
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
|
||||
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
|
||||
[TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
|
||||
[31] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[32] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[33] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[34] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
@@ -9513,7 +9441,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
|
||||
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
|
||||
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow},
|
||||
[TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
|
||||
[31] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[32] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[33] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[34] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
@@ -2195,63 +2195,6 @@ emit_pow(struct svga_shader_emitter *emit,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate/emit TGSI XPD (vector cross product) instruction.
|
||||
*/
|
||||
static boolean
|
||||
emit_xpd(struct svga_shader_emitter *emit,
|
||||
const struct tgsi_full_instruction *insn)
|
||||
{
|
||||
SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
|
||||
const struct src_register src0 = translate_src_register(
|
||||
emit, &insn->Src[0] );
|
||||
const struct src_register src1 = translate_src_register(
|
||||
emit, &insn->Src[1] );
|
||||
boolean need_dst_tmp = FALSE;
|
||||
|
||||
/* XPD can only output to a temporary */
|
||||
if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
|
||||
need_dst_tmp = TRUE;
|
||||
|
||||
/* The dst reg must not be the same as src0 or src1*/
|
||||
if (alias_src_dst(src0, dst) ||
|
||||
alias_src_dst(src1, dst))
|
||||
need_dst_tmp = TRUE;
|
||||
|
||||
if (need_dst_tmp) {
|
||||
SVGA3dShaderDestToken tmp = get_temp( emit );
|
||||
|
||||
/* Obey DX9 restrictions on mask:
|
||||
*/
|
||||
tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
|
||||
|
||||
if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
|
||||
return FALSE;
|
||||
|
||||
if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
|
||||
return FALSE;
|
||||
}
|
||||
else {
|
||||
if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Need to emit 1.0 to dst.w?
|
||||
*/
|
||||
if (dst.mask & TGSI_WRITEMASK_W) {
|
||||
struct src_register one = get_one_immediate( emit );
|
||||
|
||||
if (!submit_op1(emit,
|
||||
inst_token( SVGA3DOP_MOV ),
|
||||
writemask(dst, TGSI_WRITEMASK_W),
|
||||
one))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit a LRP (linear interpolation) instruction.
|
||||
*/
|
||||
@@ -2986,9 +2929,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
|
||||
case TGSI_OPCODE_BRK:
|
||||
return emit_brk( emit, insn );
|
||||
|
||||
case TGSI_OPCODE_XPD:
|
||||
return emit_xpd( emit, insn );
|
||||
|
||||
case TGSI_OPCODE_KILL:
|
||||
return emit_kill( emit, insn );
|
||||
|
||||
@@ -3604,7 +3544,6 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
|
||||
emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
|
||||
emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
|
||||
emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
|
||||
emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
|
||||
emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
|
||||
return TRUE;
|
||||
|
||||
|
@@ -5210,117 +5210,6 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Emit code for TGSI_OPCODE_XPD instruction.
|
||||
*/
|
||||
static boolean
|
||||
emit_xpd(struct svga_shader_emitter_v10 *emit,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
/* dst.x = src0.y * src1.z - src1.y * src0.z
|
||||
* dst.y = src0.z * src1.x - src1.z * src0.x
|
||||
* dst.z = src0.x * src1.y - src1.x * src0.y
|
||||
* dst.w = 1
|
||||
*/
|
||||
struct tgsi_full_src_register s0_xxxx =
|
||||
scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
|
||||
struct tgsi_full_src_register s0_yyyy =
|
||||
scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
|
||||
struct tgsi_full_src_register s0_zzzz =
|
||||
scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
|
||||
|
||||
struct tgsi_full_src_register s1_xxxx =
|
||||
scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
|
||||
struct tgsi_full_src_register s1_yyyy =
|
||||
scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
|
||||
struct tgsi_full_src_register s1_zzzz =
|
||||
scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
|
||||
|
||||
unsigned tmp1 = get_temp_index(emit);
|
||||
struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
|
||||
struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
|
||||
|
||||
unsigned tmp2 = get_temp_index(emit);
|
||||
struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
|
||||
struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
|
||||
struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
|
||||
|
||||
unsigned tmp3 = get_temp_index(emit);
|
||||
struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
|
||||
struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
|
||||
struct tgsi_full_dst_register tmp3_dst_x =
|
||||
writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
|
||||
struct tgsi_full_dst_register tmp3_dst_y =
|
||||
writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
|
||||
struct tgsi_full_dst_register tmp3_dst_z =
|
||||
writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
|
||||
struct tgsi_full_dst_register tmp3_dst_w =
|
||||
writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
|
||||
|
||||
/* Note: we put all the intermediate computations into tmp3 in case
|
||||
* the XPD dest register is that same as one of the src regs (in which
|
||||
* case we could clobber a src reg before we're done with it) .
|
||||
*
|
||||
* Note: we could get by with just one temp register instead of three
|
||||
* since we're doing scalar operations and there's enough room in one
|
||||
* temp for everything.
|
||||
*/
|
||||
|
||||
/* MUL tmp1, src0.y, src1.z */
|
||||
/* MUL tmp2, src1.y, src0.z */
|
||||
/* ADD tmp3.x, tmp1, -tmp2 */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
|
||||
&s0_yyyy, &s1_zzzz, FALSE);
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
|
||||
&s1_yyyy, &s0_zzzz, FALSE);
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
|
||||
&tmp1_src, &neg_tmp2_src, FALSE);
|
||||
}
|
||||
|
||||
/* MUL tmp1, src0.z, src1.x */
|
||||
/* MUL tmp2, src1.z, src0.x */
|
||||
/* ADD tmp3.y, tmp1, -tmp2 */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
|
||||
&s1_xxxx, FALSE);
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
|
||||
&s0_xxxx, FALSE);
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
|
||||
&tmp1_src, &neg_tmp2_src, FALSE);
|
||||
}
|
||||
|
||||
/* MUL tmp1, src0.x, src1.y */
|
||||
/* MUL tmp2, src1.x, src0.y */
|
||||
/* ADD tmp3.z, tmp1, -tmp2 */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
|
||||
&s1_yyyy, FALSE);
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
|
||||
&s0_yyyy, FALSE);
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
|
||||
&tmp1_src, &neg_tmp2_src, FALSE);
|
||||
}
|
||||
|
||||
/* MOV tmp3.w, 1.0 */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
|
||||
struct tgsi_full_src_register one =
|
||||
make_immediate_reg_float(emit, 1.0f);
|
||||
|
||||
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
|
||||
}
|
||||
|
||||
/* MOV dst, tmp3 */
|
||||
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
|
||||
inst->Instruction.Saturate);
|
||||
|
||||
|
||||
free_temp_indexes(emit);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for TGSI_OPCODE_TXD (explicit derivatives)
|
||||
*/
|
||||
@@ -5742,8 +5631,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
|
||||
return emit_txq(emit, inst);
|
||||
case TGSI_OPCODE_UIF:
|
||||
return emit_if(emit, inst);
|
||||
case TGSI_OPCODE_XPD:
|
||||
return emit_xpd(emit, inst);
|
||||
case TGSI_OPCODE_UMUL_HI:
|
||||
case TGSI_OPCODE_IMUL_HI:
|
||||
case TGSI_OPCODE_UDIV:
|
||||
|
@@ -368,7 +368,7 @@ struct tgsi_property_data {
|
||||
#define TGSI_OPCODE_EX2 28
|
||||
#define TGSI_OPCODE_LG2 29
|
||||
#define TGSI_OPCODE_POW 30
|
||||
#define TGSI_OPCODE_XPD 31
|
||||
/* gap */
|
||||
#define TGSI_OPCODE_U2I64 32
|
||||
#define TGSI_OPCODE_CLOCK 33
|
||||
#define TGSI_OPCODE_I2I64 34
|
||||
|
@@ -1588,6 +1588,29 @@ DECL_SPECIAL(ABS)
|
||||
return D3D_OK;
|
||||
}
|
||||
|
||||
DECL_SPECIAL(XPD)
|
||||
{
|
||||
struct ureg_program *ureg = tx->ureg;
|
||||
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
|
||||
struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
|
||||
struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
|
||||
|
||||
ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
|
||||
ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
|
||||
TGSI_SWIZZLE_X, 0),
|
||||
ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
|
||||
TGSI_SWIZZLE_Y, 0));
|
||||
ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
|
||||
ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
|
||||
TGSI_SWIZZLE_Y, 0),
|
||||
ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
|
||||
TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
|
||||
ureg_src(dst));
|
||||
ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
|
||||
ureg_imm1f(ureg, 1));
|
||||
return D3D_OK;
|
||||
}
|
||||
|
||||
DECL_SPECIAL(M4x4)
|
||||
{
|
||||
return NineTranslateInstruction_Mkxn(tx, 4, 4);
|
||||
@@ -2915,7 +2938,7 @@ struct sm1_op_info inst_table[] =
|
||||
_OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
|
||||
|
||||
_OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
|
||||
_OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
|
||||
_OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
|
||||
_OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
|
||||
_OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
|
||||
_OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
|
||||
|
@@ -505,8 +505,6 @@ translate_opcode( unsigned op )
|
||||
return TGSI_OPCODE_TXB;
|
||||
case OPCODE_TXP:
|
||||
return TGSI_OPCODE_TXP;
|
||||
case OPCODE_XPD:
|
||||
return TGSI_OPCODE_XPD;
|
||||
case OPCODE_END:
|
||||
return TGSI_OPCODE_END;
|
||||
default:
|
||||
@@ -568,11 +566,17 @@ compile_instruction(
|
||||
break;
|
||||
|
||||
case OPCODE_XPD:
|
||||
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
|
||||
ureg_insn( ureg,
|
||||
translate_opcode( inst->Opcode ),
|
||||
dst, num_dst,
|
||||
src, num_src, 0 );
|
||||
ureg_MUL(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
|
||||
ureg_swizzle(src[0], TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
|
||||
TGSI_SWIZZLE_X, 0),
|
||||
ureg_swizzle(src[1], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
|
||||
TGSI_SWIZZLE_Y, 0));
|
||||
ureg_MAD(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
|
||||
ureg_swizzle(src[0], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
|
||||
TGSI_SWIZZLE_Y, 0),
|
||||
ureg_negate(ureg_swizzle(src[1], TGSI_SWIZZLE_Y,
|
||||
TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
|
||||
ureg_src(dst[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_RSQ:
|
||||
|
Reference in New Issue
Block a user