nir/loop_analyze: Determine iteration counts for more kinds of loops
If loop iterator is incremented with something other than regular addition, it would be more error prone to calculate the number of iterations theoretically. What we can do instead, is try to emulate the loop, and determine the number of iterations empirically. These operations are covered: - imul - fmul - ishl - ishr - ushr Also add unit tests for loop unrollment. Improves performance of Aztec Ruins (sixonix gfxbench5.aztec_ruins_vk_high) by -1.28042% +/- 0.498555% (N=5) on Intel Arc A770. v2 (idr): Rebase on 3 years. :( Use nir_phi_instr_add_src in the test cases. v3 (idr): Use try_eval_const_alu in to evaluate loop termination condition in get_iteration_empirical. Also restructure the loop slightly. This fixed off by one iteration errors in "inverted" loop tests (e.g., nir_loop_analyze_test.ushr_ieq_known_count_invert_31). v4 (idr): Use try_eval_const_alu in to evaluate induction variable update in get_iteration_empirical. This fixes non-commutative update operations (e.g., shifts) when the induction varible is not the first source. This fixes the unit test nir_loop_analyze_test.ishl_rev_ieq_infinite_loop_unknown_count. v5 (idr): Fix _type parameter for fadd and fadd_rev loop unroll tests. Hopefully that fixes the failure on s390x. Temporarily disable fmul. This works-around the revealed problem in glsl-fs-loop-unroll-mul-fp64, and there were no shader-db or fossil-db changes. v6 (idr): Plumb max_unroll_iterations into get_iteration_empirical. I was going to do this, but I forgot. Suggested by Tim. v7 (idr): Disable fadd tests on s390x. They fail because S390 is weird. Almost all of the shaders affected (OpenGL or Vulkan) are from gfxbench or geekbench. A couple shaders in Deus Ex (OpenGL), Dirt Rally (OpenGL), Octopath Traveler (Vulkan), and Rise of the Tomb Raider (Vulkan) are helped. The lost / gained shaders in OpenGL are an Aztec Ruins shader that goes from SIMD16 to SIMD8. The spills / fills affected are in a single Aztec Ruins (Vulkan) compute shader. shader-db results: Skylake, Ice Lake, and Tiger Lake had similar results. (Tiger Lake shown) total loops in shared programs: 5514 -> 5470 (-0.80%) loops in affected programs: 62 -> 18 (-70.97%) helped: 37 / HURT: 0 LOST: 2 GAINED: 2 Haswell and Broadwell had similar results. (Broadwell shown) total loops in shared programs: 5346 -> 5298 (-0.90%) loops in affected programs: 66 -> 18 (-72.73%) helped: 39 / HURT: 0 fossil-db results: Skylake, Ice Lake, and Tiger Lake had similar results. (Tiger Lake shown) Instructions in all programs: 157374679 -> 157397421 (+0.0%) Instructions hurt: 28 SENDs in all programs: 7463800 -> 7467639 (+0.1%) SENDs hurt: 28 Loops in all programs: 38980 -> 38950 (-0.1%) Loops helped: 28 Cycles in all programs: 7559486451 -> 7557455384 (-0.0%) Cycles helped: 28 Spills in all programs: 11405 -> 11403 (-0.0%) Spills helped: 1 Fills in all programs: 19578 -> 19588 (+0.1%) Fills hurt: 1 Lost: 1 Signed-off-by: Yevhenii Kolesnikov <yevhenii.kolesnikov@globallogic.com> Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3445>
This commit is contained in:

committed by
Marge Bot

parent
f051967f19
commit
9427aaeab7
@@ -411,6 +411,7 @@ if with_tests
|
||||
'tests/control_flow_tests.cpp',
|
||||
'tests/core_tests.cpp',
|
||||
'tests/loop_analyze_tests.cpp',
|
||||
'tests/loop_unroll_tests.cpp',
|
||||
'tests/lower_alu_width_tests.cpp',
|
||||
'tests/lower_returns_tests.cpp',
|
||||
'tests/mod_analysis_tests.cpp',
|
||||
|
@@ -907,6 +907,43 @@ get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
|
||||
return iter_u64 > INT_MAX ? -1 : (int)iter_u64;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
get_iteration_empirical(nir_alu_instr *cond_alu, nir_alu_instr *incr_alu,
|
||||
nir_ssa_def *basis, nir_const_value initial,
|
||||
bool invert_cond, unsigned execution_mode,
|
||||
unsigned max_unroll_iterations)
|
||||
{
|
||||
int iter_count = 0;
|
||||
nir_const_value result;
|
||||
nir_const_value iter = initial;
|
||||
|
||||
const nir_ssa_def *originals[2] = { basis, NULL };
|
||||
const nir_const_value *replacements[2] = { &iter, NULL };
|
||||
|
||||
while (iter_count <= max_unroll_iterations) {
|
||||
bool success;
|
||||
|
||||
success = try_eval_const_alu(&result, cond_alu, originals, replacements,
|
||||
1, execution_mode);
|
||||
if (!success)
|
||||
return -1;
|
||||
|
||||
const bool cond_succ = invert_cond ? !result.b : result.b;
|
||||
if (cond_succ)
|
||||
return iter_count;
|
||||
|
||||
iter_count++;
|
||||
|
||||
success = try_eval_const_alu(&result, incr_alu, originals, replacements,
|
||||
1, execution_mode);
|
||||
assert(success);
|
||||
|
||||
iter = result;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static bool
|
||||
will_break_on_first_iteration(nir_alu_instr *cond_alu, nir_ssa_def *basis,
|
||||
nir_ssa_def *limit_basis,
|
||||
@@ -980,7 +1017,8 @@ calculate_iterations(nir_ssa_def *basis, nir_ssa_def *limit_basis,
|
||||
nir_const_value initial, nir_const_value step,
|
||||
nir_const_value limit, nir_alu_instr *alu,
|
||||
nir_ssa_scalar cond, nir_op alu_op, bool limit_rhs,
|
||||
bool invert_cond, unsigned execution_mode)
|
||||
bool invert_cond, unsigned execution_mode,
|
||||
unsigned max_unroll_iterations)
|
||||
{
|
||||
/* nir_op_isub should have been lowered away by this point */
|
||||
assert(alu->op != nir_op_isub);
|
||||
@@ -1027,6 +1065,12 @@ calculate_iterations(nir_ssa_def *basis, nir_ssa_def *limit_basis,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* For loops incremented with addition operation, it's easy to
|
||||
* calculate the number of iterations theoretically. Even though it
|
||||
* is possible for other operations as well, it is much more error
|
||||
* prone, and doesn't cover all possible cases. So, we try to
|
||||
* emulate the loop.
|
||||
*/
|
||||
int iter_int;
|
||||
switch (alu->op) {
|
||||
case nir_op_iadd:
|
||||
@@ -1037,12 +1081,20 @@ calculate_iterations(nir_ssa_def *basis, nir_ssa_def *limit_basis,
|
||||
iter_int = get_iteration(alu_op, initial, step, limit, bit_size,
|
||||
execution_mode);
|
||||
break;
|
||||
case nir_op_imul:
|
||||
case nir_op_fmul:
|
||||
/* Detecting non-zero loop counts when the loop increment is floating
|
||||
* point multiplication triggers a preexisting problem in
|
||||
* glsl-fs-loop-unroll-mul-fp64.shader_test. See
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3445#note_1779438.
|
||||
*/
|
||||
return -1;
|
||||
case nir_op_imul:
|
||||
case nir_op_ishl:
|
||||
case nir_op_ishr:
|
||||
case nir_op_ushr:
|
||||
return -1;
|
||||
return get_iteration_empirical(cond_alu, alu, basis, initial,
|
||||
invert_cond, execution_mode,
|
||||
max_unroll_iterations);
|
||||
default:
|
||||
unreachable("Invalid induction variable increment operation.");
|
||||
}
|
||||
@@ -1206,7 +1258,8 @@ try_find_trip_count_vars_in_iand(nir_ssa_scalar *cond,
|
||||
* loop.
|
||||
*/
|
||||
static void
|
||||
find_trip_count(loop_info_state *state, unsigned execution_mode)
|
||||
find_trip_count(loop_info_state *state, unsigned execution_mode,
|
||||
unsigned max_unroll_iterations)
|
||||
{
|
||||
bool trip_count_known = true;
|
||||
bool guessed_trip_count = false;
|
||||
@@ -1329,7 +1382,8 @@ find_trip_count(loop_info_state *state, unsigned execution_mode)
|
||||
cond,
|
||||
alu_op, limit_rhs,
|
||||
invert_cond,
|
||||
execution_mode);
|
||||
execution_mode,
|
||||
max_unroll_iterations);
|
||||
|
||||
/* Where we not able to calculate the iteration count */
|
||||
if (iterations == -1) {
|
||||
@@ -1488,7 +1542,9 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl)
|
||||
return;
|
||||
|
||||
/* Run through each of the terminators and try to compute a trip-count */
|
||||
find_trip_count(state, impl->function->shader->info.float_controls_execution_mode);
|
||||
find_trip_count(state,
|
||||
impl->function->shader->info.float_controls_execution_mode,
|
||||
impl->function->shader->options->max_unroll_iterations);
|
||||
|
||||
nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
|
@@ -36,7 +36,10 @@ nir_loop_analyze_test::nir_loop_analyze_test()
|
||||
{
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
static const nir_shader_compiler_options options = { };
|
||||
static nir_shader_compiler_options options = { };
|
||||
|
||||
options.max_unroll_iterations = 32;
|
||||
|
||||
b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options,
|
||||
"loop analyze");
|
||||
}
|
||||
@@ -917,6 +920,7 @@ COMPARE_REVERSE(ilt)
|
||||
COMPARE_REVERSE(ige)
|
||||
COMPARE_REVERSE(ult)
|
||||
COMPARE_REVERSE(uge)
|
||||
COMPARE_REVERSE(ishl)
|
||||
|
||||
#define INOT_COMPARE(comp) \
|
||||
static nir_ssa_def * \
|
||||
@@ -1195,7 +1199,7 @@ INFINITE_LOOP_UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000003, 0x40000000, uge,
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, ine, ushr)
|
||||
KNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, ine, ushr, 1)
|
||||
|
||||
/* uint i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1205,7 +1209,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, ine, ushr)
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x80000000, 0x00000000, 0x00000001, ieq, ushr)
|
||||
KNOWN_COUNT_TEST(0x80000000, 0x00000000, 0x00000001, ieq, ushr, 32)
|
||||
|
||||
/* uint i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1215,7 +1219,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0x00000000, 0x00000001, ieq, ushr)
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x80000000, 0x00000002, 0x00000001, ult, ushr)
|
||||
KNOWN_COUNT_TEST(0x80000000, 0x00000002, 0x00000001, ult, ushr, 31)
|
||||
|
||||
/* uint i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1245,7 +1249,7 @@ KNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, uge, ushr, 0)
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x80000000, 0x00008000, 0x00000001, uge_rev, ushr)
|
||||
KNOWN_COUNT_TEST(0x80000000, 0x00008000, 0x00000001, uge_rev, ushr, 16)
|
||||
|
||||
/* uint i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1265,7 +1269,7 @@ KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x80000000, ine, ushr, 0)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000000, ieq, ushr)
|
||||
KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000000, ieq, ushr, 31)
|
||||
|
||||
/* uint i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1305,7 +1309,7 @@ KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000000, uge, ushr, 0)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000008, uge_rev, ushr)
|
||||
KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000008, uge_rev, ushr, 27)
|
||||
|
||||
/* int i = 0xffffffff;
|
||||
* while (true) {
|
||||
@@ -1445,7 +1449,7 @@ KNOWN_COUNT_TEST(0x7fffffff, 0x00000000, 0x00000001, ine, ishr, 0)
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x40000000, 0x00000001, 0x00000001, ieq, ishr)
|
||||
KNOWN_COUNT_TEST(0x40000000, 0x00000001, 0x00000001, ieq, ishr, 30)
|
||||
|
||||
/* int i = 0x7fffffff;
|
||||
* while (true) {
|
||||
@@ -1455,7 +1459,7 @@ UNKNOWN_COUNT_TEST(0x40000000, 0x00000001, 0x00000001, ieq, ishr)
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
|
||||
KNOWN_COUNT_TEST(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr, 31)
|
||||
|
||||
/* int i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1465,7 +1469,7 @@ UNKNOWN_COUNT_TEST(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x80000000, 0xffff0000, 0x00000001, ilt_rev, ishr)
|
||||
KNOWN_COUNT_TEST(0x80000000, 0xffff0000, 0x00000001, ilt_rev, ishr, 16)
|
||||
|
||||
/* int i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1475,7 +1479,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0xffff0000, 0x00000001, ilt_rev, ishr)
|
||||
* i >>= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x80000000, 0xffffffff, 0x00000001, ige, ishr)
|
||||
KNOWN_COUNT_TEST(0x80000000, 0xffffffff, 0x00000001, ige, ishr, 31)
|
||||
|
||||
/* int i = 0x12345678;
|
||||
* while (true) {
|
||||
@@ -1485,7 +1489,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0xffffffff, 0x00000001, ige, ishr)
|
||||
* i >>= 4;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x12345678, 0x00000001, 0x00000004, ige_rev, ishr)
|
||||
KNOWN_COUNT_TEST(0x12345678, 0x00000001, 0x00000004, ige_rev, ishr, 7)
|
||||
|
||||
/* int i = 0x7fffffff;
|
||||
* while (true) {
|
||||
@@ -1505,7 +1509,7 @@ KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ine, ishr, 0)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ieq, ishr)
|
||||
KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ieq, ishr, 30)
|
||||
|
||||
/* int i = 0x7fffffff;
|
||||
* while (true) {
|
||||
@@ -1515,7 +1519,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ieq, ishr)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
|
||||
KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr, 30)
|
||||
|
||||
/* int i = 0x80000000;
|
||||
* while (true) {
|
||||
@@ -1525,7 +1529,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0xfffffffe, ilt_rev, ishr)
|
||||
KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0xfffffffe, ilt_rev, ishr, 30)
|
||||
|
||||
/* int i = 0xbfffffff;
|
||||
* while (true) {
|
||||
@@ -1535,7 +1539,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0xfffffffe, ilt_rev, ishr)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0xbfffffff, 0x00000001, 0xfffffffe, ige, ishr)
|
||||
KNOWN_COUNT_TEST_INVERT(0xbfffffff, 0x00000001, 0xfffffffe, ige, ishr, 29)
|
||||
|
||||
/* int i = 0x7fffffff;
|
||||
* while (true) {
|
||||
@@ -1545,7 +1549,7 @@ UNKNOWN_COUNT_TEST_INVERT(0xbfffffff, 0x00000001, 0xfffffffe, ige, ishr)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000002, ige_rev, ishr)
|
||||
KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000002, ige_rev, ishr, 29)
|
||||
|
||||
/* int i = 0;
|
||||
* while (true) {
|
||||
@@ -1675,7 +1679,7 @@ INFINITE_LOOP_UNKNOWN_COUNT_TEST_INVERT(0x77777777, 0x00000004, 0xffffffff, ige_
|
||||
* i <<= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ine, ishl)
|
||||
KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ine, ishl, 1)
|
||||
|
||||
/* int i = 1;
|
||||
* while (true) {
|
||||
@@ -1685,7 +1689,17 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ine, ishl)
|
||||
* i <<= 4;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x00000001, 0x00001000, 0x00000004, ieq, ishl)
|
||||
KNOWN_COUNT_TEST(0x00000001, 0x00001000, 0x00000004, ieq, ishl, 3)
|
||||
|
||||
/* uint i = 1;
|
||||
* while (true) {
|
||||
* if (i < 1)
|
||||
* break;
|
||||
*
|
||||
* i <<= 1;
|
||||
* }
|
||||
*/
|
||||
KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ult, ishl, 32)
|
||||
|
||||
/* int i = 1;
|
||||
* while (true) {
|
||||
@@ -1695,7 +1709,7 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00001000, 0x00000004, ieq, ishl)
|
||||
* i <<= 1;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ilt, ishl)
|
||||
KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ilt, ishl, 31)
|
||||
|
||||
/* int i = 0xffff0000;
|
||||
* while (true) {
|
||||
@@ -1705,7 +1719,7 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ilt, ishl)
|
||||
* i <<= 2;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0xffff0000, 0xffffffff, 0x00000002, ilt_rev, ishl)
|
||||
KNOWN_COUNT_TEST(0xffff0000, 0xffffffff, 0x00000002, ilt_rev, ishl, 8)
|
||||
|
||||
/* int i = 0xf;
|
||||
* while (true) {
|
||||
@@ -1715,7 +1729,7 @@ UNKNOWN_COUNT_TEST(0xffff0000, 0xffffffff, 0x00000002, ilt_rev, ishl)
|
||||
* i <<= 3;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x00000003, ige, ishl)
|
||||
KNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x00000003, ige, ishl, 5)
|
||||
|
||||
/* int i = 0x0000000f;
|
||||
* while (true) {
|
||||
@@ -1725,7 +1739,7 @@ UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x00000003, ige, ishl)
|
||||
* i <<= 4;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x0000000f, 0xfffd0000, 0x00000004, ige_rev, ishl)
|
||||
KNOWN_COUNT_TEST(0x0000000f, 0xfffd0000, 0x00000004, ige_rev, ishl, 7)
|
||||
|
||||
/* int i = 1;
|
||||
* while (true) {
|
||||
@@ -1735,7 +1749,7 @@ UNKNOWN_COUNT_TEST(0x0000000f, 0xfffd0000, 0x00000004, ige_rev, ishl)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000001, 0x00000002, ine, ishl)
|
||||
KNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000001, 0x00000002, ine, ishl, 1)
|
||||
|
||||
/* int i = 1;
|
||||
* while (true) {
|
||||
@@ -1745,7 +1759,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000001, 0x00000002, ine, ishl)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000008, 0x01000000, ieq, ishl)
|
||||
KNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000008, 0x01000000, ieq, ishl, 2)
|
||||
|
||||
/* int i = 0x7fffffff;
|
||||
* while (true) {
|
||||
@@ -1765,7 +1779,7 @@ KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishl, 0)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x00007fff, 0x00000002, 0x1fffffff, ilt_rev, ishl)
|
||||
KNOWN_COUNT_TEST_INVERT(0x00007fff, 0x00000002, 0x1fffffff, ilt_rev, ishl, 7)
|
||||
|
||||
/* int i = 0xffff7fff;
|
||||
* while (true) {
|
||||
@@ -1775,7 +1789,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00007fff, 0x00000002, 0x1fffffff, ilt_rev, ishl)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x00000004, 0xfffffffe, ige, ishl)
|
||||
KNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x00000004, 0xfffffffe, ige, ishl, 3)
|
||||
|
||||
/* int i = 0x0000f0f0;
|
||||
* while (true) {
|
||||
@@ -1785,7 +1799,21 @@ UNKNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x00000004, 0xfffffffe, ige, ishl)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x0000f0f0, 0x00000004, 0xfffffffe, ige_rev, ishl)
|
||||
KNOWN_COUNT_TEST_INVERT(0x0000f0f0, 0x00000004, 0xfffffffe, ige_rev, ishl, 3)
|
||||
|
||||
/* This infinite loop makes no sense, but it's a good test to make sure the
|
||||
* loop analysis code doesn't incorrectly treat left-shift as a commutative
|
||||
* operation.
|
||||
*
|
||||
* int i = 1;
|
||||
* while (true) {
|
||||
* if (i == 0)
|
||||
* break;
|
||||
*
|
||||
* i = 1 << i;
|
||||
* }
|
||||
*/
|
||||
INFINITE_LOOP_UNKNOWN_COUNT_TEST(0x00000001, 0x00000000, 0x00000001, ieq, ishl_rev)
|
||||
|
||||
/* int i = 0;
|
||||
* while (true) {
|
||||
@@ -1882,7 +1910,7 @@ INFINITE_LOOP_UNKNOWN_COUNT_TEST_INVERT(0x88888888, 0x00000010, 0x00000001, ige,
|
||||
* i = i * 7;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000007, ine, imul)
|
||||
KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000007, ine, imul, 1)
|
||||
|
||||
/* int i = 2;
|
||||
* while (true) {
|
||||
@@ -1892,7 +1920,7 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000007, ine, imul)
|
||||
* i = i * 3;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x00000002, 0x00000036, 0x00000003, ieq, imul)
|
||||
KNOWN_COUNT_TEST(0x00000002, 0x00000036, 0x00000003, ieq, imul, 3)
|
||||
|
||||
/* int i = 5;
|
||||
* while (true) {
|
||||
@@ -1902,7 +1930,7 @@ UNKNOWN_COUNT_TEST(0x00000002, 0x00000036, 0x00000003, ieq, imul)
|
||||
* i = i * -3;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x00000005, 0x00000001, 0xfffffffd, ilt, imul)
|
||||
KNOWN_COUNT_TEST(0x00000005, 0x00000001, 0xfffffffd, ilt, imul, 1)
|
||||
|
||||
/* int i = 0xf;
|
||||
* while (true) {
|
||||
@@ -1912,7 +1940,7 @@ UNKNOWN_COUNT_TEST(0x00000005, 0x00000001, 0xfffffffd, ilt, imul)
|
||||
* i = i * 11;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x0000000b, ige, imul)
|
||||
KNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x0000000b, ige, imul, 4)
|
||||
|
||||
/* int i = 3;
|
||||
* while (true) {
|
||||
@@ -1922,7 +1950,7 @@ UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x0000000b, ige, imul)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffffb, 0xfffffff1, ine, imul)
|
||||
KNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffffb, 0xfffffff1, ine, imul, 1)
|
||||
|
||||
/* int i = 3;
|
||||
* while (true) {
|
||||
@@ -1932,7 +1960,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffffb, 0xfffffff1, ine, imul)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffff9, 0x000562b3, ieq, imul)
|
||||
KNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffff9, 0x000562b3, ieq, imul, 5)
|
||||
|
||||
/* int i = 0x7f;
|
||||
* while (true) {
|
||||
@@ -1942,7 +1970,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffff9, 0x000562b3, ieq, imul)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0x0000007f, 0x00000003, 0x00000001, ilt, imul)
|
||||
KNOWN_COUNT_TEST_INVERT(0x0000007f, 0x00000003, 0x00000001, ilt, imul, 16)
|
||||
|
||||
/* int i = 0xffff7fff;
|
||||
* while (true) {
|
||||
@@ -1952,4 +1980,4 @@ UNKNOWN_COUNT_TEST_INVERT(0x0000007f, 0x00000003, 0x00000001, ilt, imul)
|
||||
* break;
|
||||
* }
|
||||
*/
|
||||
UNKNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x0000000f, 0x34cce9b0, ige, imul)
|
||||
KNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x0000000f, 0x34cce9b0, ige, imul, 4)
|
||||
|
180
src/compiler/nir/tests/loop_unroll_tests.cpp
Normal file
180
src/compiler/nir/tests/loop_unroll_tests.cpp
Normal file
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_phi_builder.h"
|
||||
|
||||
#define UNROLL_TEST_INSERT(_label, _type, _init, _limit, _step, \
|
||||
_cond, _incr, _rev, _exp_res, \
|
||||
_exp_instr_count, _exp_loop_count) \
|
||||
TEST_F(nir_loop_unroll_test, _label) \
|
||||
{ \
|
||||
nir_ssa_def *init = nir_imm_##_type(&bld, _init); \
|
||||
nir_ssa_def *limit = nir_imm_##_type(&bld, _limit); \
|
||||
nir_ssa_def *step = nir_imm_##_type(&bld, _step); \
|
||||
loop_unroll_test_helper(&bld, init, limit, step, \
|
||||
&nir_##_cond, &nir_##_incr, _rev); \
|
||||
EXPECT_##_exp_res(nir_opt_loop_unroll(bld.shader)); \
|
||||
EXPECT_EQ(_exp_instr_count, count_instr(nir_op_##_incr)); \
|
||||
EXPECT_EQ(_exp_loop_count, count_loops()); \
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class nir_loop_unroll_test : public ::testing::Test {
|
||||
protected:
|
||||
nir_loop_unroll_test()
|
||||
{
|
||||
glsl_type_singleton_init_or_ref();
|
||||
static nir_shader_compiler_options options = { };
|
||||
options.max_unroll_iterations = 32;
|
||||
options.force_indirect_unrolling_sampler = false;
|
||||
options.force_indirect_unrolling = nir_var_all;
|
||||
bld = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options,
|
||||
"loop unrolling tests");
|
||||
}
|
||||
~nir_loop_unroll_test()
|
||||
{
|
||||
ralloc_free(bld.shader);
|
||||
glsl_type_singleton_decref();
|
||||
}
|
||||
|
||||
int count_instr(nir_op op);
|
||||
int count_loops(void);
|
||||
|
||||
nir_builder bld;
|
||||
};
|
||||
|
||||
} /* namespace */
|
||||
|
||||
int
|
||||
nir_loop_unroll_test::count_instr(nir_op op)
|
||||
{
|
||||
int count = 0;
|
||||
nir_foreach_block(block, bld.impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
continue;
|
||||
nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
|
||||
if (alu_instr->op == op)
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
int
|
||||
nir_loop_unroll_test::count_loops(void)
|
||||
{
|
||||
int count = 0;
|
||||
foreach_list_typed(nir_cf_node, cf_node, node, &bld.impl->body) {
|
||||
if (cf_node->type == nir_cf_node_loop)
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
void
|
||||
loop_unroll_test_helper(nir_builder *bld, nir_ssa_def *init,
|
||||
nir_ssa_def *limit, nir_ssa_def *step,
|
||||
nir_ssa_def* (*cond_instr)(nir_builder*,
|
||||
nir_ssa_def*,
|
||||
nir_ssa_def*),
|
||||
nir_ssa_def* (*incr_instr)(nir_builder*,
|
||||
nir_ssa_def*,
|
||||
nir_ssa_def*),
|
||||
bool reverse)
|
||||
{
|
||||
nir_loop *loop = nir_push_loop(bld);
|
||||
|
||||
nir_block *top_block =
|
||||
nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
|
||||
nir_block *head_block = nir_loop_first_block(loop);
|
||||
|
||||
nir_phi_instr *phi = nir_phi_instr_create(bld->shader);
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest, 1, 32, NULL);
|
||||
|
||||
nir_phi_instr_add_src(phi, top_block, nir_src_for_ssa(init));
|
||||
|
||||
nir_ssa_def *cond = cond_instr(bld,
|
||||
(reverse ? limit : &phi->dest.ssa),
|
||||
(reverse ? &phi->dest.ssa : limit));
|
||||
|
||||
nir_if *nif = nir_push_if(bld, cond);
|
||||
nir_jump(bld, nir_jump_break);
|
||||
nir_pop_if(bld, nif);
|
||||
|
||||
nir_ssa_def *var = incr_instr(bld, &phi->dest.ssa, step);
|
||||
|
||||
nir_phi_instr_add_src(phi, nir_cursor_current_block(bld->cursor),
|
||||
nir_src_for_ssa(var));
|
||||
|
||||
nir_pop_loop(bld, loop);
|
||||
|
||||
bld->cursor = nir_after_phis(head_block);
|
||||
nir_builder_instr_insert(bld, &phi->instr);
|
||||
|
||||
nir_validate_shader(bld->shader, NULL);
|
||||
}
|
||||
|
||||
UNROLL_TEST_INSERT(iadd, int, 0, 24, 4,
|
||||
ige, iadd, false, TRUE, 6, 0)
|
||||
UNROLL_TEST_INSERT(iadd_rev, int, 0, 24, 4,
|
||||
ilt, iadd, true, TRUE, 7, 0)
|
||||
#ifndef __s390x__
|
||||
UNROLL_TEST_INSERT(fadd, float, 0.0, 24.0, 4.0,
|
||||
fge, fadd, false, TRUE, 6, 0)
|
||||
UNROLL_TEST_INSERT(fadd_rev, float, 0.0, 24.0, 4.0,
|
||||
flt, fadd, true, TRUE, 7, 0)
|
||||
#endif
|
||||
UNROLL_TEST_INSERT(imul, int, 1, 81, 3,
|
||||
ige, imul, false, TRUE, 4, 0)
|
||||
UNROLL_TEST_INSERT(imul_rev, int, 1, 81, 3,
|
||||
ilt, imul, true, TRUE, 5, 0)
|
||||
#if 0 /* Disable tests until support is re-enabled in loop_analyze. */
|
||||
UNROLL_TEST_INSERT(fmul, float, 1.5, 81.0, 3.0,
|
||||
fge, fmul, false, TRUE, 4, 0)
|
||||
UNROLL_TEST_INSERT(fmul_rev, float, 1.0, 81.0, 3.0,
|
||||
flt, fmul, true, TRUE, 5, 0)
|
||||
#endif
|
||||
UNROLL_TEST_INSERT(ishl, int, 1, 128, 1,
|
||||
ige, ishl, false, TRUE, 7, 0)
|
||||
UNROLL_TEST_INSERT(ishl_rev, int, 1, 128, 1,
|
||||
ilt, ishl, true, TRUE, 8, 0)
|
||||
UNROLL_TEST_INSERT(ishr, int, 64, 4, 1,
|
||||
ilt, ishr, false, TRUE, 5, 0)
|
||||
UNROLL_TEST_INSERT(ishr_rev, int, 64, 4, 1,
|
||||
ige, ishr, true, TRUE, 4, 0)
|
||||
UNROLL_TEST_INSERT(ushr, int, 64, 4, 1,
|
||||
ilt, ushr, false, TRUE, 5, 0)
|
||||
UNROLL_TEST_INSERT(ushr_rev, int, 64, 4, 1,
|
||||
ige, ushr, true, TRUE, 4, 0)
|
||||
|
||||
UNROLL_TEST_INSERT(lshl_neg, int, 0xf0f0f0f0, 0, 1,
|
||||
ige, ishl, false, TRUE, 4, 0)
|
||||
UNROLL_TEST_INSERT(lshl_neg_rev, int, 0xf0f0f0f0, 0, 1,
|
||||
ilt, ishl, true, TRUE, 4, 0)
|
Reference in New Issue
Block a user