r300: implement bias presubtract
RV530 shader-db: total instructions in shared programs: 129468 -> 128859 (-0.47%) instructions in affected programs: 34432 -> 33823 (-1.77%) helped: 362 HURT: 56 total presub in shared programs: 5411 -> 7635 (41.10%) presub in affected programs: 2069 -> 4293 (107.49%) helped: 8 HURT: 468 total temps in shared programs: 16918 -> 16944 (0.15%) temps in affected programs: 2022 -> 2048 (1.29%) helped: 73 HURT: 79 total lits in shared programs: 3555 -> 2913 (-18.06%) lits in affected programs: 2346 -> 1704 (-27.37%) helped: 479 HURT: 0 total cycles in shared programs: 194675 -> 194124 (-0.28%) cycles in affected programs: 62939 -> 62388 (-0.88%) helped: 343 HURT: 84 Also dEQP-GLES2.functional.shaders.random.trigonometric.fragment.15 now fits into the instruction limit on RV370. Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24830>
This commit is contained in:

committed by
Marge Bot

parent
3b74360338
commit
0508db9155
@@ -53,7 +53,6 @@ dEQP-GLES2.functional.shaders.random.all_features.fragment.5,Fail
|
|||||||
dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail
|
dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail
|
||||||
dEQP-GLES2.functional.shaders.random.all_features.fragment.93,Fail
|
dEQP-GLES2.functional.shaders.random.all_features.fragment.93,Fail
|
||||||
dEQP-GLES2.functional.shaders.random.all_features.fragment.97,Fail
|
dEQP-GLES2.functional.shaders.random.all_features.fragment.97,Fail
|
||||||
dEQP-GLES2.functional.shaders.random.trigonometric.fragment.15,Fail
|
|
||||||
dEQP-GLES2.functional.shaders.random.trigonometric.fragment.45,Fail
|
dEQP-GLES2.functional.shaders.random.trigonometric.fragment.45,Fail
|
||||||
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_l8_npot,Fail
|
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_l8_npot,Fail
|
||||||
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_rgb888_npot,Fail
|
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_rgb888_npot,Fail
|
||||||
|
@@ -47,7 +47,6 @@ dEQP-GLES2.functional.shaders.random.all_features.fragment.5,Fail
|
|||||||
dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail
|
dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail
|
||||||
dEQP-GLES2.functional.shaders.random.all_features.fragment.93,Fail
|
dEQP-GLES2.functional.shaders.random.all_features.fragment.93,Fail
|
||||||
dEQP-GLES2.functional.shaders.random.all_features.fragment.97,Fail
|
dEQP-GLES2.functional.shaders.random.all_features.fragment.97,Fail
|
||||||
dEQP-GLES2.functional.shaders.random.trigonometric.fragment.15,Fail
|
|
||||||
dEQP-GLES2.functional.shaders.random.trigonometric.fragment.45,Fail
|
dEQP-GLES2.functional.shaders.random.trigonometric.fragment.45,Fail
|
||||||
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_l8_npot,Fail
|
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_l8_npot,Fail
|
||||||
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_rgb888_npot,Fail
|
dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_rgb888_npot,Fail
|
||||||
|
@@ -70,6 +70,12 @@ r300_nir_prepare_presubtract = [
|
|||||||
(('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))),
|
(('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))),
|
||||||
(('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))),
|
(('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))),
|
||||||
(('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))),
|
(('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))),
|
||||||
|
# Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form.
|
||||||
|
(('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
|
||||||
|
(('ffma', a, -2.0, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
|
||||||
|
(('ffma', -2.0, a, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
|
||||||
|
(('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
|
||||||
|
(('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Previous prepare_presubtract pass can sometimes produce double fneg patterns.
|
# Previous prepare_presubtract pass can sometimes produce double fneg patterns.
|
||||||
|
@@ -481,7 +481,7 @@ static int is_presub_candidate(
|
|||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned int is_constant[2] = {0, 0};
|
unsigned int is_constant[2] = {0, 0};
|
||||||
|
|
||||||
assert(inst->U.I.Opcode == RC_OPCODE_ADD);
|
assert(inst->U.I.Opcode == RC_OPCODE_ADD || inst->U.I.Opcode == RC_OPCODE_MAD);
|
||||||
|
|
||||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
|
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
|
||||||
|| inst->U.I.SaturateMode
|
|| inst->U.I.SaturateMode
|
||||||
@@ -490,7 +490,7 @@ static int is_presub_candidate(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If both sources use a constant swizzle, then we can't convert it to
|
/* If first two sources use a constant swizzle, then we can't convert it to
|
||||||
* a presubtract operation. In fact for the ADD and SUB presubtract
|
* a presubtract operation. In fact for the ADD and SUB presubtract
|
||||||
* operations neither source can contain a constant swizzle. This
|
* operations neither source can contain a constant swizzle. This
|
||||||
* specific case is checked in peephole_add_presub_add() when
|
* specific case is checked in peephole_add_presub_add() when
|
||||||
@@ -573,6 +573,23 @@ static void presub_replace_inv(
|
|||||||
inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
|
inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void presub_replace_bias(
|
||||||
|
struct rc_instruction * inst_mad,
|
||||||
|
struct rc_instruction * inst_reader,
|
||||||
|
unsigned int src_index)
|
||||||
|
{
|
||||||
|
/* We must be careful not to modify inst_mad, since it
|
||||||
|
* is possible it will remain part of the program.*/
|
||||||
|
inst_reader->U.I.PreSub.SrcReg[0] = inst_mad->U.I.SrcReg[0];
|
||||||
|
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
|
||||||
|
inst_reader->U.I.PreSub.Opcode = RC_PRESUB_BIAS;
|
||||||
|
inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
|
||||||
|
inst_reader->U.I.PreSub.SrcReg[0]);
|
||||||
|
|
||||||
|
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
|
||||||
|
inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_BIAS;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
|
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
|
||||||
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
|
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
|
||||||
@@ -622,6 +639,66 @@ static int peephole_add_presub_inv(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PRESUB_BIAD: MAD -TEMP[0], 2.0, 1.0
|
||||||
|
* Use the presubtract 1 - 2*src0 for all readers of TEMP[0]. The first source
|
||||||
|
* of the add instruction must have the constant 1 swizzle. This function
|
||||||
|
* does not check const registers to see if their value is 1.0, so it should
|
||||||
|
* be called after the constant_folding optimization.
|
||||||
|
* @return
|
||||||
|
* 0 if the MAD instruction is still part of the program.
|
||||||
|
* 1 if the MAD instruction is no longer part of the program.
|
||||||
|
*/
|
||||||
|
static int peephole_mad_presub_bias(
|
||||||
|
struct radeon_compiler * c,
|
||||||
|
struct rc_instruction * inst_mad)
|
||||||
|
{
|
||||||
|
unsigned int i, swz;
|
||||||
|
|
||||||
|
if (!is_presub_candidate(c, inst_mad))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Check if src2 is 1. */
|
||||||
|
for(i = 0; i < 4; i++ ) {
|
||||||
|
if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
swz = GET_SWZ(inst_mad->U.I.SrcReg[2].Swizzle, i);
|
||||||
|
if (swz != RC_SWIZZLE_ONE || inst_mad->U.I.SrcReg[2].Negate & (1 << i))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if src1 is 2. */
|
||||||
|
struct rc_src_register src1_reg = inst_mad->U.I.SrcReg[1];
|
||||||
|
if ((src1_reg.Negate & inst_mad->U.I.DstReg.WriteMask) != 0 || src1_reg.Abs)
|
||||||
|
return 0;
|
||||||
|
struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index];
|
||||||
|
if (constant->Type != RC_CONSTANT_IMMEDIATE)
|
||||||
|
return 0;
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i)))
|
||||||
|
continue;
|
||||||
|
swz = GET_SWZ(src1_reg.Swizzle, i);
|
||||||
|
if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check src0. */
|
||||||
|
if ((inst_mad->U.I.SrcReg[0].Negate & inst_mad->U.I.DstReg.WriteMask) !=
|
||||||
|
inst_mad->U.I.DstReg.WriteMask
|
||||||
|
|| inst_mad->U.I.SrcReg[0].Abs
|
||||||
|
|| src_has_const_swz(inst_mad->U.I.SrcReg[0])) {
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (presub_helper(c, inst_mad, RC_PRESUB_BIAS, presub_replace_bias)) {
|
||||||
|
rc_remove_instruction(inst_mad);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
struct peephole_mul_cb_data {
|
struct peephole_mul_cb_data {
|
||||||
struct rc_dst_register * Writer;
|
struct rc_dst_register * Writer;
|
||||||
unsigned int Clobbered;
|
unsigned int Clobbered;
|
||||||
@@ -821,6 +898,12 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
|
|||||||
return 1;
|
return 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case RC_OPCODE_MAD:
|
||||||
|
{
|
||||||
|
if (peephole_mad_presub_bias(c, inst))
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user