From 2bdffe7eb212ddc10f2ed9ef51095886a55109b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Fri, 13 May 2022 09:11:27 +0200 Subject: [PATCH] r300: be less agresive with copy propagate in loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there are multiple MOVs with the same destination in loop in different branches and some readers after the loop, we would now errorneously copy propagate the last MOV, like in the following snippet: BGNLOOP; ... IF temp[3].x___; MOV temp[2], const[1].yxxy; BRK; ENDIF; IF temp[4].x___; MOV temp[2], const[1].xyxy; BRK; ENDIF; ... MOV temp[2], const[1].xyxy; ENDLOOP; ADD_SAT temp[0], temp[2], temp[1]; into: BGNLOOP; ... IF temp[3].x___; MOV temp[2], const[1].yxxy; BRK; ENDIF; IF temp[3].y___; MOV temp[2], const[1].xyxy; BRK; ENDIF; ... ENDLOOP; ADD_SAT temp[0], const[1].xyxy, temp[1]; We need the copy propagate just for simple cleanups after ttn, anything more complex should have been handled already in NIR. So just bail out if any of the readers is after the loop. No changes in shader-db. Fixes few piglit tests when loop unrolling is disabled: spec@glsl-1.10@execution@vs-loop-complex-unroll spec@glsl-1.10@execution@vs-loop-complex-unroll-nested-break spec@glsl-1.10@execution@vs-loop-complex-unroll-with-else-break Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6467 Signed-off-by: Pavel Ondračka Reviewed-by: Filip Gawin Part-of: --- src/gallium/drivers/r300/compiler/radeon_dataflow.c | 10 ++++++++++ src/gallium/drivers/r300/compiler/radeon_dataflow.h | 3 +++ src/gallium/drivers/r300/compiler/radeon_optimize.c | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c index 302576b9fed..c0acb3ee681 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c @@ -688,6 +688,7 @@ static void get_readers_for_single_write( unsigned int branch_depth = 0; struct rc_instruction * endloop = NULL; unsigned int abort_on_read_at_endloop = 0; + int readers_before_endloop = -1; struct get_readers_callback_data * d = userdata; d->ReaderData->Writer = writer; @@ -695,6 +696,7 @@ static void get_readers_for_single_write( d->ReaderData->AbortOnWrite = 0; d->ReaderData->LoopDepth = 0; d->ReaderData->InElse = 0; + d->ReaderData->ReadersAfterEndloop = false; d->DstFile = dst_file; d->DstIndex = dst_index; d->DstMask = dst_mask; @@ -780,11 +782,19 @@ static void get_readers_for_single_write( get_readers_pair_read_callback, d); } + /* Writer was in loop and we have some readers after it. + * Set a flag so we can be extra careful in copy propagate. + */ + if (readers_before_endloop != -1 && + d->ReaderData->ReaderCount > readers_before_endloop) + d->ReaderData->ReadersAfterEndloop = true; + /* This can happen when we jump from an ENDLOOP to BGNLOOP */ if (tmp == writer) { tmp = endloop; endloop = NULL; d->ReaderData->AbortOnRead = abort_on_read_at_endloop; + readers_before_endloop = d->ReaderData->ReaderCount; continue; } rc_for_all_writes_mask(tmp, get_readers_write_callback, d); diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h index bb8d48206e2..ef8e2bfe851 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.h +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -31,6 +31,8 @@ #include "radeon_program_constants.h" +#include + struct radeon_compiler; struct rc_instruction; struct rc_swizzle_caps; @@ -91,6 +93,7 @@ struct rc_reader_data { unsigned int AbortOnWrite; unsigned int LoopDepth; unsigned int InElse; + bool ReadersAfterEndloop; struct rc_instruction * Writer; unsigned int ReaderCount; diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index a7e9445ce11..5a9f4a529df 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -154,7 +154,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i copy_propagate_scan_read, NULL, is_src_clobbered_scan_write); - if (reader_data.Abort || reader_data.ReaderCount == 0) + if (reader_data.Abort || reader_data.ReaderCount == 0 || reader_data.ReadersAfterEndloop) return; /* We can propagate SaturateMode if all the readers are MOV instructions