i965: Add support for the DP2 opcode, which we use for dot(vec2, vec2).

The original glsl compiler would generate a.x * b.x + a.y * b.y, which
we would do mul+mul+add for instead of this mul+mac.

Fixes glsl-fs-dot-vec2.
This commit is contained in:
Eric Anholt
2010-07-02 16:17:50 -07:00
parent 8f25d198e5
commit 4e7d5d0e74
4 changed files with 38 additions and 0 deletions

View File

@@ -343,6 +343,11 @@ void emit_delta_xy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0);
void emit_dp2(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0,
const struct brw_reg *arg1);
void emit_dp3(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,

View File

@@ -731,6 +731,27 @@ void emit_min(struct brw_compile *p,
}
void emit_dp2(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0,
const struct brw_reg *arg1)
{
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]);
brw_set_saturate(p, 0);
}
void emit_dp3(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -1584,6 +1605,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
break;
case OPCODE_DP2:
emit_dp2(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;

View File

@@ -1903,6 +1903,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_SWZ:
emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
break;
case OPCODE_DP2:
emit_dp2(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;

View File

@@ -255,6 +255,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
read2 = WRITEMASK_W; /* pixel w */
break;
case OPCODE_DP2:
read0 = WRITEMASK_XY;
read1 = WRITEMASK_XY;
break;
case OPCODE_DP3:
read0 = WRITEMASK_XYZ;
read1 = WRITEMASK_XYZ;