i915: Optimize SEQ and SNE when two operands are uniforms
SEQ and SNE are not native i915 instructions, so they each generate at least 3 instructions. If both operands are uniforms or constants, we get 5 instructions like: U[1] = MOV CONST[1] U[0].xyz = SGE CONST[0].xxxx, U[1] U[1] = MOV CONST[1].-x-y-z-w R[0].xyz = SGE CONST[0].-x-x-x-x, U[1] R[0].xyz = MUL R[0], U[0] This code is stupid. Instead of having the individual calls to i915_emit_arith generate the moves to utemps, do it in the caller. This results in code like: U[1] = MOV CONST[1] U[0].xyz = SGE CONST[0].xxxx, U[1] R[0].xyz = SGE CONST[0].-x-x-x-x, U[1].-x-y-z-w R[0].xyz = MUL R[0], U[0] This allows fs-temp-array-mat2-index-col-wr and fs-temp-array-mat2-index-row-wr to fit in hardware limits (instead of falling back to software rasterization). NOTE: Without pending patches to the piglit tests, these tests will now fail. This is an unrelated, pre-existing issue. v2: Copy most of the body of the commit message into comments in the code. Suggested by Eric. Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
@@ -817,23 +817,52 @@ upload_program(struct i915_fragment_program *p)
|
||||
flags = get_result_flags(inst);
|
||||
dst = get_result_vector(p, inst);
|
||||
|
||||
/* If both operands are uniforms or constants, we get 5 instructions
|
||||
* like:
|
||||
*
|
||||
* U[1] = MOV CONST[1]
|
||||
* U[0].xyz = SGE CONST[0].xxxx, U[1]
|
||||
* U[1] = MOV CONST[1].-x-y-z-w
|
||||
* R[0].xyz = SGE CONST[0].-x-x-x-x, U[1]
|
||||
* R[0].xyz = MUL R[0], U[0]
|
||||
*
|
||||
* This code is stupid. Instead of having the individual calls to
|
||||
* i915_emit_arith generate the moves to utemps, do it in the caller.
|
||||
* This results in code like:
|
||||
*
|
||||
* U[1] = MOV CONST[1]
|
||||
* U[0].xyz = SGE CONST[0].xxxx, U[1]
|
||||
* R[0].xyz = SGE CONST[0].-x-x-x-x, U[1].-x-y-z-w
|
||||
* R[0].xyz = MUL R[0], U[0]
|
||||
*/
|
||||
src0 = src_vector(p, &inst->SrcReg[0], program);
|
||||
src1 = src_vector(p, &inst->SrcReg[1], program);
|
||||
|
||||
if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
|
||||
&& GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
|
||||
unsigned tmp = i915_get_utemp(p);
|
||||
|
||||
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src1, 0, 0);
|
||||
|
||||
src1 = tmp;
|
||||
}
|
||||
|
||||
/* tmp = src1 >= src2 */
|
||||
i915_emit_arith(p,
|
||||
A0_SGE,
|
||||
tmp,
|
||||
flags, 0,
|
||||
src_vector(p, &inst->SrcReg[0], program),
|
||||
src_vector(p, &inst->SrcReg[1], program),
|
||||
src0,
|
||||
src1,
|
||||
0);
|
||||
/* dst = src1 <= src2 */
|
||||
i915_emit_arith(p,
|
||||
A0_SGE,
|
||||
dst,
|
||||
flags, 0,
|
||||
negate(src_vector(p, &inst->SrcReg[0], program),
|
||||
1, 1, 1, 1),
|
||||
negate(src_vector(p, &inst->SrcReg[1], program),
|
||||
1, 1, 1, 1),
|
||||
negate(src0, 1, 1, 1, 1),
|
||||
negate(src1, 1, 1, 1, 1),
|
||||
0);
|
||||
/* dst = tmp && dst */
|
||||
i915_emit_arith(p,
|
||||
@@ -966,23 +995,52 @@ upload_program(struct i915_fragment_program *p)
|
||||
flags = get_result_flags(inst);
|
||||
dst = get_result_vector(p, inst);
|
||||
|
||||
/* If both operands are uniforms or constants, we get 5 instructions
|
||||
* like:
|
||||
*
|
||||
* U[1] = MOV CONST[1]
|
||||
* U[0].xyz = SLT CONST[0].xxxx, U[1]
|
||||
* U[1] = MOV CONST[1].-x-y-z-w
|
||||
* R[0].xyz = SLT CONST[0].-x-x-x-x, U[1]
|
||||
* R[0].xyz = MUL R[0], U[0]
|
||||
*
|
||||
* This code is stupid. Instead of having the individual calls to
|
||||
* i915_emit_arith generate the moves to utemps, do it in the caller.
|
||||
* This results in code like:
|
||||
*
|
||||
* U[1] = MOV CONST[1]
|
||||
* U[0].xyz = SLT CONST[0].xxxx, U[1]
|
||||
* R[0].xyz = SLT CONST[0].-x-x-x-x, U[1].-x-y-z-w
|
||||
* R[0].xyz = MUL R[0], U[0]
|
||||
*/
|
||||
src0 = src_vector(p, &inst->SrcReg[0], program);
|
||||
src1 = src_vector(p, &inst->SrcReg[1], program);
|
||||
|
||||
if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
|
||||
&& GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
|
||||
unsigned tmp = i915_get_utemp(p);
|
||||
|
||||
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
src1, 0, 0);
|
||||
|
||||
src1 = tmp;
|
||||
}
|
||||
|
||||
/* tmp = src1 < src2 */
|
||||
i915_emit_arith(p,
|
||||
A0_SLT,
|
||||
tmp,
|
||||
flags, 0,
|
||||
src_vector(p, &inst->SrcReg[0], program),
|
||||
src_vector(p, &inst->SrcReg[1], program),
|
||||
src0,
|
||||
src1,
|
||||
0);
|
||||
/* dst = src1 > src2 */
|
||||
i915_emit_arith(p,
|
||||
A0_SLT,
|
||||
dst,
|
||||
flags, 0,
|
||||
negate(src_vector(p, &inst->SrcReg[0], program),
|
||||
1, 1, 1, 1),
|
||||
negate(src_vector(p, &inst->SrcReg[1], program),
|
||||
1, 1, 1, 1),
|
||||
negate(src0, 1, 1, 1, 1),
|
||||
negate(src1, 1, 1, 1, 1),
|
||||
0);
|
||||
/* dst = tmp || dst */
|
||||
i915_emit_arith(p,
|
||||
|
Reference in New Issue
Block a user