gallium: remove TGSI opcode DPH
use DP4 or DP3 + ADD. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
@@ -234,22 +234,6 @@ static struct lp_build_tgsi_action dp4_action = {
|
||||
dp4_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_DPH */
|
||||
static void
|
||||
dph_fetch_args(
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
dp_fetch_args(bld_base, emit_data, 4);
|
||||
/* src0.w */
|
||||
emit_data->args[3] = bld_base->base.one;
|
||||
}
|
||||
|
||||
const struct lp_build_tgsi_action dph_action = {
|
||||
dph_fetch_args, /* fetch_args */
|
||||
dp4_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_DST */
|
||||
static void
|
||||
dst_fetch_args(
|
||||
@@ -1258,7 +1242,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
|
||||
bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
|
||||
|
@@ -593,9 +593,6 @@ lp_emit_instruction_aos(
|
||||
case TGSI_OPCODE_XPD:
|
||||
return FALSE;
|
||||
|
||||
case TGSI_OPCODE_DPH:
|
||||
return FALSE;
|
||||
|
||||
case TGSI_OPCODE_COS:
|
||||
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
|
||||
tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
|
||||
|
@@ -1018,13 +1018,6 @@ ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
|
||||
}
|
||||
|
||||
static void
|
||||
ttn_dph(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
ttn_move_dest(b, dest, nir_fadd(b, nir_fdot3(b, src[0], src[1]),
|
||||
ttn_channel(b, src[1], W)));
|
||||
}
|
||||
|
||||
static void
|
||||
ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
@@ -1534,7 +1527,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
|
||||
[TGSI_OPCODE_LG2] = nir_op_flog2,
|
||||
[TGSI_OPCODE_POW] = nir_op_fpow,
|
||||
[TGSI_OPCODE_XPD] = 0,
|
||||
[TGSI_OPCODE_DPH] = 0,
|
||||
[TGSI_OPCODE_COS] = nir_op_fcos,
|
||||
[TGSI_OPCODE_DDX] = nir_op_fddx,
|
||||
[TGSI_OPCODE_DDY] = nir_op_fddy,
|
||||
@@ -1763,10 +1755,6 @@ ttn_emit_instruction(struct ttn_compile *c)
|
||||
ttn_dp4(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DPH:
|
||||
ttn_dph(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_UMAD:
|
||||
ttn_umad(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
@@ -3184,35 +3184,6 @@ exec_dp4(struct tgsi_exec_machine *mach,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_dph(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
unsigned int chan;
|
||||
union tgsi_exec_channel arg[3];
|
||||
|
||||
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
micro_mul(&arg[2], &arg[0], &arg[1]);
|
||||
|
||||
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
|
||||
|
||||
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
|
||||
micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
|
||||
|
||||
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
|
||||
micro_add(&arg[0], &arg[0], &arg[1]);
|
||||
|
||||
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
|
||||
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
|
||||
store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_dp2(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
@@ -5186,10 +5157,6 @@ exec_instruction(
|
||||
exec_xpd(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DPH:
|
||||
exec_dph(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_COS:
|
||||
exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
|
||||
break;
|
||||
|
@@ -72,7 +72,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
|
||||
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "", 35 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
|
||||
|
@@ -914,9 +914,6 @@ transform_log(struct tgsi_transform_context *tctx,
|
||||
* DP3 - 3-component Dot Product
|
||||
* dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
|
||||
*
|
||||
* DPH - Homogeneous Dot Product
|
||||
* dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
|
||||
*
|
||||
* DP2 - 2-component Dot Product
|
||||
* dst = src0.x \times src1.x + src0.y \times src1.y
|
||||
*
|
||||
@@ -924,16 +921,14 @@ transform_log(struct tgsi_transform_context *tctx,
|
||||
* operations, which is what you'd prefer for a ISA that is natively
|
||||
* scalar. Probably a native vector ISA would at least already have
|
||||
* DP4/DP3 instructions, but perhaps there is room for an alternative
|
||||
* translation for DPH/DP2 using vector instructions.
|
||||
* translation for DP2 using vector instructions.
|
||||
*
|
||||
* ; needs: 1 tmp
|
||||
* MUL tmpA.x, src0.x, src1.x
|
||||
* MAD tmpA.x, src0.y, src1.y, tmpA.x
|
||||
* if (DPH || DP3 || DP4) {
|
||||
* if (DP3 || DP4) {
|
||||
* MAD tmpA.x, src0.z, src1.z, tmpA.x
|
||||
* if (DPH) {
|
||||
* ADD tmpA.x, src1.w, tmpA.x
|
||||
* } else if (DP4) {
|
||||
* if (DP4) {
|
||||
* MAD tmpA.x, src0.w, src1.w, tmpA.x
|
||||
* }
|
||||
* }
|
||||
@@ -941,7 +936,6 @@ transform_log(struct tgsi_transform_context *tctx,
|
||||
*/
|
||||
#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
|
||||
#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
|
||||
#define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
|
||||
#define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
|
||||
#define DOTP_TMP 1
|
||||
static void
|
||||
@@ -980,8 +974,7 @@ transform_dotp(struct tgsi_transform_context *tctx,
|
||||
reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
|
||||
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
|
||||
|
||||
if ((opcode == TGSI_OPCODE_DPH) ||
|
||||
(opcode == TGSI_OPCODE_DP3) ||
|
||||
if ((opcode == TGSI_OPCODE_DP3) ||
|
||||
(opcode == TGSI_OPCODE_DP4)) {
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
@@ -995,18 +988,7 @@ transform_dotp(struct tgsi_transform_context *tctx,
|
||||
reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
|
||||
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
|
||||
|
||||
if (opcode == TGSI_OPCODE_DPH) {
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
/* ADD tmpA.x, src1.w, tmpA.x */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
|
||||
new_inst.Instruction.NumSrcRegs = 2;
|
||||
reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
|
||||
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
|
||||
} else if (opcode == TGSI_OPCODE_DP4) {
|
||||
if (opcode == TGSI_OPCODE_DP4) {
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
/* MAD tmpA.x, src0.w, src1.w, tmpA.x */
|
||||
@@ -1534,11 +1516,6 @@ transform_instr(struct tgsi_transform_context *tctx,
|
||||
goto skip;
|
||||
transform_dotp(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_DPH:
|
||||
if (!ctx->config->lower_DPH)
|
||||
goto skip;
|
||||
transform_dotp(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_DP2:
|
||||
if (!ctx->config->lower_DP2)
|
||||
goto skip;
|
||||
@@ -1632,7 +1609,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
||||
OPCS(LOG) ||
|
||||
OPCS(DP4) ||
|
||||
OPCS(DP3) ||
|
||||
OPCS(DPH) ||
|
||||
OPCS(DP2) ||
|
||||
OPCS(FLR) ||
|
||||
OPCS(CEIL) ||
|
||||
@@ -1693,10 +1669,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
||||
newlen += DP3_GROW * OPCS(DP3);
|
||||
numtmp = MAX2(numtmp, DOTP_TMP);
|
||||
}
|
||||
if (OPCS(DPH)) {
|
||||
newlen += DPH_GROW * OPCS(DPH);
|
||||
numtmp = MAX2(numtmp, DOTP_TMP);
|
||||
}
|
||||
if (OPCS(DP2)) {
|
||||
newlen += DP2_GROW * OPCS(DP2);
|
||||
numtmp = MAX2(numtmp, DOTP_TMP);
|
||||
|
@@ -65,7 +65,6 @@ struct tgsi_lowering_config
|
||||
unsigned lower_LOG:1;
|
||||
unsigned lower_DP4:1;
|
||||
unsigned lower_DP3:1;
|
||||
unsigned lower_DPH:1;
|
||||
unsigned lower_DP2:1;
|
||||
unsigned lower_FLR:1;
|
||||
unsigned lower_CEIL:1;
|
||||
|
@@ -64,7 +64,6 @@ OP11(EX2)
|
||||
OP11(LG2)
|
||||
OP12(POW)
|
||||
OP12(XPD)
|
||||
OP12(DPH)
|
||||
OP11(COS)
|
||||
OP11(DDX)
|
||||
OP11(DDY)
|
||||
|
@@ -274,10 +274,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
|
||||
read_mask = TGSI_WRITEMASK_XYZW;
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DPH:
|
||||
read_mask = src_idx == 0 ? TGSI_WRITEMASK_XYZ : TGSI_WRITEMASK_XYZW;
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_TEX:
|
||||
case TGSI_OPCODE_TXD:
|
||||
case TGSI_OPCODE_TXB:
|
||||
|
@@ -363,15 +363,6 @@ This instruction replicates its result.
|
||||
dst.w = 1
|
||||
|
||||
|
||||
.. opcode:: DPH - Homogeneous Dot Product
|
||||
|
||||
This instruction replicates its result.
|
||||
|
||||
.. math::
|
||||
|
||||
dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
|
||||
|
||||
|
||||
.. opcode:: COS - Cosine
|
||||
|
||||
This instruction replicates its result.
|
||||
@@ -3672,7 +3663,7 @@ of the operands are equal to 0. That means that 0 * Inf = 0. This
|
||||
should be set the same way for an entire pipeline. Note that this
|
||||
applies not only to the literal MUL TGSI opcode, but all FP32
|
||||
multiplications implied by other operations, such as MAD, FMA, DP2,
|
||||
DP3, DP4, DPH, DST, LOG, LRP, XPD, and possibly others. If there is a
|
||||
DP3, DP4, DST, LOG, LRP, XPD, and possibly others. If there is a
|
||||
mismatch between shaders, then it is unspecified whether this behavior
|
||||
will be enabled.
|
||||
|
||||
|
@@ -1663,33 +1663,6 @@ trans_lg2(const struct instr_translater *t, struct etna_compile *c,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
trans_dph(const struct instr_translater *t, struct etna_compile *c,
|
||||
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
|
||||
{
|
||||
/*
|
||||
DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
|
||||
ADD dst.xyzw, tmp.xyzw, void, src1.wwww
|
||||
*/
|
||||
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
|
||||
struct etna_inst ins[2] = { };
|
||||
|
||||
ins[0].opcode = INST_OPCODE_DP3;
|
||||
ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
|
||||
INST_COMPS_Z | INST_COMPS_W);
|
||||
ins[0].src[0] = src[0];
|
||||
ins[0].src[1] = src[1];
|
||||
|
||||
ins[1].opcode = INST_OPCODE_ADD;
|
||||
ins[1].sat = inst->Instruction.Saturate;
|
||||
ins[1].dst = convert_dst(c, &inst->Dst[0]);
|
||||
ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
|
||||
ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
|
||||
|
||||
emit_inst(c, &ins[0]);
|
||||
emit_inst(c, &ins[1]);
|
||||
}
|
||||
|
||||
static void
|
||||
trans_sampler(const struct instr_translater *t, struct etna_compile *c,
|
||||
const struct tgsi_full_instruction *inst,
|
||||
@@ -1833,7 +1806,6 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
|
||||
INSTR(LRP, trans_lrp),
|
||||
INSTR(LIT, trans_lit),
|
||||
INSTR(SSG, trans_ssg),
|
||||
INSTR(DPH, trans_dph),
|
||||
|
||||
INSTR(SIN, trans_trig),
|
||||
INSTR(COS, trans_trig),
|
||||
|
@@ -85,7 +85,6 @@ static const struct {
|
||||
[ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
|
||||
[ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
|
||||
[ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
|
||||
[ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 },
|
||||
[ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 },
|
||||
[ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 },
|
||||
[ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 },
|
||||
|
@@ -604,17 +604,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
|
||||
emit_simple_arith(p, inst, A0_DP4, 2, fs);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DPH:
|
||||
src0 = src_vector(p, &inst->Src[0], fs);
|
||||
src1 = src_vector(p, &inst->Src[1], fs);
|
||||
|
||||
i915_emit_arith(p,
|
||||
A0_DP4,
|
||||
get_result_vector(p, &inst->Dst[0]),
|
||||
get_result_flags(inst), 0,
|
||||
swizzle(src0, X, Y, Z, ONE), src1, 0);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DST:
|
||||
src0 = src_vector(p, &inst->Src[0], fs);
|
||||
src1 = src_vector(p, &inst->Src[1], fs);
|
||||
|
@@ -277,7 +277,6 @@ unsigned int Instruction::srcMask(unsigned int s) const
|
||||
case TGSI_OPCODE_DP3:
|
||||
return 0x7;
|
||||
case TGSI_OPCODE_DP4:
|
||||
case TGSI_OPCODE_DPH:
|
||||
case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
|
||||
return 0xf;
|
||||
case TGSI_OPCODE_DST:
|
||||
@@ -3321,13 +3320,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
mkMov(dst0[c], val0);
|
||||
break;
|
||||
case TGSI_OPCODE_DPH:
|
||||
val0 = buildDot(3);
|
||||
src1 = fetchSrc(1, 3);
|
||||
mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
mkMov(dst0[c], val0);
|
||||
break;
|
||||
case TGSI_OPCODE_DST:
|
||||
if (dst0[0])
|
||||
loadImm(dst0[0], 1.0f);
|
||||
|
@@ -591,11 +591,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
|
||||
case TGSI_OPCODE_DP4:
|
||||
nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
|
||||
break;
|
||||
case TGSI_OPCODE_DPH:
|
||||
tmp = nvfx_src(temp(fpc));
|
||||
nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
|
||||
nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
|
||||
break;
|
||||
case TGSI_OPCODE_DST:
|
||||
nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
|
||||
break;
|
||||
|
@@ -588,9 +588,6 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
|
||||
case TGSI_OPCODE_DP4:
|
||||
nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none));
|
||||
break;
|
||||
case TGSI_OPCODE_DPH:
|
||||
nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none));
|
||||
break;
|
||||
case TGSI_OPCODE_DST:
|
||||
nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none));
|
||||
break;
|
||||
|
@@ -58,7 +58,6 @@ static unsigned translate_opcode(unsigned opcode)
|
||||
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
|
||||
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
|
||||
case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
|
||||
case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
|
||||
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
|
||||
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
|
||||
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
|
||||
|
@@ -6617,13 +6617,6 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
|
||||
alu.src[0].chan = alu.src[1].chan = 0;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_DPH:
|
||||
if (i == 3) {
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_1;
|
||||
alu.src[0].chan = 0;
|
||||
alu.src[0].neg = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -9103,7 +9096,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
|
||||
[32] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[33] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[34] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[35] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
|
||||
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
|
||||
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
|
||||
@@ -9301,7 +9294,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
||||
[32] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[33] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[34] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[35] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
|
||||
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
|
||||
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
|
||||
@@ -9524,7 +9517,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
||||
[32] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[33] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[34] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[35] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
|
||||
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
|
||||
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
|
||||
|
@@ -1236,39 +1236,6 @@ emit_dp2(struct svga_shader_emitter *emit,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate the following TGSI DPH instruction.
|
||||
* DPH DST, SRC1, SRC2
|
||||
* To the following SVGA3D instruction sequence.
|
||||
* DP3 TMP, SRC1, SRC2
|
||||
* ADD DST, TMP, SRC2.wwww
|
||||
*/
|
||||
static boolean
|
||||
emit_dph(struct svga_shader_emitter *emit,
|
||||
const struct tgsi_full_instruction *insn )
|
||||
{
|
||||
SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
|
||||
const struct src_register src0 = translate_src_register(
|
||||
emit, &insn->Src[0] );
|
||||
struct src_register src1 =
|
||||
translate_src_register(emit, &insn->Src[1]);
|
||||
SVGA3dShaderDestToken temp = get_temp( emit );
|
||||
|
||||
/* DP3 TMP, SRC1, SRC2 */
|
||||
if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
|
||||
return FALSE;
|
||||
|
||||
src1 = scalar(src1, TGSI_SWIZZLE_W);
|
||||
|
||||
/* ADD DST, TMP, SRC2.wwww */
|
||||
if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
|
||||
src( temp ), src1 ))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sine / Cosine helper function.
|
||||
*/
|
||||
@@ -2924,9 +2891,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
|
||||
case TGSI_OPCODE_DP2:
|
||||
return emit_dp2( emit, insn );
|
||||
|
||||
case TGSI_OPCODE_DPH:
|
||||
return emit_dph( emit, insn );
|
||||
|
||||
case TGSI_OPCODE_COS:
|
||||
return emit_cos( emit, insn );
|
||||
|
||||
|
@@ -3577,40 +3577,6 @@ emit_cmp(struct svga_shader_emitter_v10 *emit,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for TGSI_OPCODE_DPH instruction.
|
||||
*/
|
||||
static boolean
|
||||
emit_dph(struct svga_shader_emitter_v10 *emit,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
/*
|
||||
* DP3 tmp, s0, s1
|
||||
* ADD dst, tmp, s1.wwww
|
||||
*/
|
||||
|
||||
struct tgsi_full_src_register s1_wwww =
|
||||
swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
|
||||
TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
|
||||
|
||||
unsigned tmp = get_temp_index(emit);
|
||||
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
|
||||
struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
|
||||
|
||||
/* DP3 tmp, s0, s1 */
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
|
||||
&inst->Src[1], FALSE);
|
||||
|
||||
/* ADD dst, tmp, s1.wwww */
|
||||
emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
|
||||
&s1_wwww, inst->Instruction.Saturate);
|
||||
|
||||
free_temp_indexes(emit);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for TGSI_OPCODE_DST instruction.
|
||||
*/
|
||||
@@ -5712,8 +5678,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
|
||||
return emit_cmp(emit, inst);
|
||||
case TGSI_OPCODE_COS:
|
||||
return emit_sincos(emit, inst);
|
||||
case TGSI_OPCODE_DPH:
|
||||
return emit_dph(emit, inst);
|
||||
case TGSI_OPCODE_DST:
|
||||
return emit_dst(emit, inst);
|
||||
case TGSI_OPCODE_EX2:
|
||||
|
@@ -372,7 +372,7 @@ struct tgsi_property_data {
|
||||
#define TGSI_OPCODE_U2I64 32
|
||||
#define TGSI_OPCODE_CLOCK 33
|
||||
#define TGSI_OPCODE_I2I64 34
|
||||
#define TGSI_OPCODE_DPH 35
|
||||
/* gap */
|
||||
#define TGSI_OPCODE_COS 36
|
||||
#define TGSI_OPCODE_DDX 37
|
||||
#define TGSI_OPCODE_DDY 38
|
||||
|
@@ -457,8 +457,6 @@ translate_opcode( unsigned op )
|
||||
return TGSI_OPCODE_DP3;
|
||||
case OPCODE_DP4:
|
||||
return TGSI_OPCODE_DP4;
|
||||
case OPCODE_DPH:
|
||||
return TGSI_OPCODE_DPH;
|
||||
case OPCODE_DST:
|
||||
return TGSI_OPCODE_DST;
|
||||
case OPCODE_EX2:
|
||||
@@ -589,6 +587,17 @@ compile_instruction(
|
||||
ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1]));
|
||||
break;
|
||||
|
||||
case OPCODE_DPH: {
|
||||
struct ureg_dst temp = ureg_DECL_temporary(ureg);
|
||||
|
||||
/* DPH = DP4(src0, src1) where src0.w = 1. */
|
||||
ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_XYZ), src[0]);
|
||||
ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_W),
|
||||
ureg_imm1f(ureg, 1));
|
||||
ureg_DP4(ureg, dst[0], ureg_src(temp), src[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
ureg_insn( ureg,
|
||||
translate_opcode( inst->Opcode ),
|
||||
|
Reference in New Issue
Block a user