aco/tests: add tests for VALUReadSGPRHazard
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30478>
This commit is contained in:
@@ -1505,6 +1505,196 @@ BEGIN_TEST(insert_nops.export_priority.set_prio)
|
||||
finish_insert_nops_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_nops.valu_read_sgpr.basic)
|
||||
if (!setup_cs(NULL, GFX12))
|
||||
return;
|
||||
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(7), s1));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(sgpr_null, s1));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(exec_lo, s1));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(m0, s1));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(scc, s1));
|
||||
|
||||
/* no hazard: SALU write missing */
|
||||
//>> p_unit_test 0
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
/* no hazard: SGPR never read by VALU */
|
||||
//! p_unit_test 1
|
||||
//! s1: %0:s[16] = s_mov_b32 0
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[16]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(16), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(16), s1));
|
||||
|
||||
/* basic case: SALU read */
|
||||
//! p_unit_test 2
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
/* basic case again: VALU reads never expire */
|
||||
//! p_unit_test 3
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
/* sa_sdst(0) resolves the hazard */
|
||||
//! p_unit_test 4
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
//! p_unit_test 5
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, 0xfffe);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
/* basic case: VALU read */
|
||||
//! p_unit_test 6
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
|
||||
|
||||
/* the SALU write is in the same SGPR pair as the VALU read */
|
||||
//! p_unit_test 7
|
||||
//! s1: %0:s[6] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[6]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(6), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(6), s1));
|
||||
|
||||
/* no hazard: these registers are not problematic */
|
||||
//! p_unit_test 8
|
||||
//! s1: %0:null = s_mov_b32 0
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:null
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(sgpr_null, s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(sgpr_null, s1));
|
||||
|
||||
//! p_unit_test 9
|
||||
//! s1: %0:exec_lo = s_mov_b32 0
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:exec_lo
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(exec_lo, s1));
|
||||
|
||||
//! p_unit_test 10
|
||||
//! s1: %0:m0 = s_mov_b32 0
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:m0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(m0, s1));
|
||||
|
||||
//! p_unit_test 11
|
||||
//! s1: %0:scc = s_cmp_lg_i32 0, 0
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:scc
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
|
||||
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand::zero(4), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(scc, s1));
|
||||
|
||||
/* 11 SALU between the write and a VALU read expire the hazard */
|
||||
//! p_unit_test 12
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//; for i in range(11): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
|
||||
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
for (unsigned i = 0; i < 11; i++)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
|
||||
|
||||
//! p_unit_test 13
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//; for i in range(10): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
for (unsigned i = 0; i < 10; i++)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
|
||||
|
||||
/* 10 SALU between the write and a SALU read expire the hazard */
|
||||
//! p_unit_test 14
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//; for i in range(10): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
for (unsigned i = 0; i < 10; i++)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
//! p_unit_test 15
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//; for i in range(9): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
for (unsigned i = 0; i < 9; i++)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
/* SOPP in-between the write and the read do not count */
|
||||
//! p_unit_test 16
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//; for i in range(9): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
|
||||
//! s_nop
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
for (unsigned i = 0; i < 9; i++)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
|
||||
bld.sopp(aco_opcode::s_nop, 0);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
finish_insert_nops_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_nops.valu_read_sgpr.previous_part)
|
||||
if (!setup_cs(NULL, GFX12))
|
||||
return;
|
||||
|
||||
/* Raytracing shaders have a prolog and may also be split into several parts. */
|
||||
program->stage = raytracing_cs;
|
||||
|
||||
/* Despite the SGPR never being read by a VALU in this shader, a sa_sdst(0) is needed. */
|
||||
//>> p_unit_test 0
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr sa_sdst(0)
|
||||
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
|
||||
|
||||
finish_insert_nops_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_nops.setpc_gfx6)
|
||||
if (!setup_cs(NULL, GFX6))
|
||||
return;
|
||||
@@ -1894,5 +2084,42 @@ BEGIN_TEST(insert_nops.setpc_gfx12)
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
||||
/* VALUReadSGPRHazard */
|
||||
//! p_unit_test 4
|
||||
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//! s_waitcnt_depctr va_vdst(0) sa_sdst(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
||||
//! p_unit_test 5
|
||||
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//; for i in range(10): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
|
||||
//! s_waitcnt_depctr va_vdst(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
for (unsigned i = 0; i < 10; i++) /* the s_setpc_b64 counts */
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
||||
//! p_unit_test 6
|
||||
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
|
||||
//! s1: %0:s[4] = s_mov_b32 0
|
||||
//; for i in range(9): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
|
||||
//! s_waitcnt_depctr va_vdst(0) sa_sdst(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
|
||||
for (unsigned i = 0; i < 9; i++)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
||||
finish_insert_nops_test(true);
|
||||
END_TEST
|
||||
|
Reference in New Issue
Block a user