i965: Add support for the DP2 opcode, which we use for dot(vec2, vec2).
The original glsl compiler would generate a.x * b.x + a.y * b.y, which we would do mul+mul+add for instead of this mul+mac. Fixes glsl-fs-dot-vec2.
This commit is contained in:
@@ -343,6 +343,11 @@ void emit_delta_xy(struct brw_compile *p,
|
||||
const struct brw_reg *dst,
|
||||
GLuint mask,
|
||||
const struct brw_reg *arg0);
|
||||
void emit_dp2(struct brw_compile *p,
|
||||
const struct brw_reg *dst,
|
||||
GLuint mask,
|
||||
const struct brw_reg *arg0,
|
||||
const struct brw_reg *arg1);
|
||||
void emit_dp3(struct brw_compile *p,
|
||||
const struct brw_reg *dst,
|
||||
GLuint mask,
|
||||
|
@@ -731,6 +731,27 @@ void emit_min(struct brw_compile *p,
|
||||
}
|
||||
|
||||
|
||||
void emit_dp2(struct brw_compile *p,
|
||||
const struct brw_reg *dst,
|
||||
GLuint mask,
|
||||
const struct brw_reg *arg0,
|
||||
const struct brw_reg *arg1)
|
||||
{
|
||||
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
|
||||
|
||||
if (!(mask & WRITEMASK_XYZW))
|
||||
return; /* Do not emit dead code */
|
||||
|
||||
assert(is_power_of_two(mask & WRITEMASK_XYZW));
|
||||
|
||||
brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
|
||||
|
||||
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
|
||||
brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]);
|
||||
brw_set_saturate(p, 0);
|
||||
}
|
||||
|
||||
|
||||
void emit_dp3(struct brw_compile *p,
|
||||
const struct brw_reg *dst,
|
||||
GLuint mask,
|
||||
@@ -1584,6 +1605,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
|
||||
emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_DP2:
|
||||
emit_dp2(p, dst, dst_flags, args[0], args[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_DP3:
|
||||
emit_dp3(p, dst, dst_flags, args[0], args[1]);
|
||||
break;
|
||||
|
@@ -1903,6 +1903,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
|
||||
case OPCODE_SWZ:
|
||||
emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
|
||||
break;
|
||||
case OPCODE_DP2:
|
||||
emit_dp2(p, dst, dst_flags, args[0], args[1]);
|
||||
break;
|
||||
case OPCODE_DP3:
|
||||
emit_dp3(p, dst, dst_flags, args[0], args[1]);
|
||||
break;
|
||||
|
@@ -255,6 +255,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
|
||||
read2 = WRITEMASK_W; /* pixel w */
|
||||
break;
|
||||
|
||||
case OPCODE_DP2:
|
||||
read0 = WRITEMASK_XY;
|
||||
read1 = WRITEMASK_XY;
|
||||
break;
|
||||
|
||||
case OPCODE_DP3:
|
||||
read0 = WRITEMASK_XYZ;
|
||||
read1 = WRITEMASK_XYZ;
|
||||
|
Reference in New Issue
Block a user