aco/tests: add tests for VALUReadSGPRHazard

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30478>
This commit is contained in:
Rhys Perry
2024-08-01 14:46:45 +01:00
committed by Marge Bot
parent 47e0f468cf
commit 4579586c66

View File

@@ -1505,6 +1505,196 @@ BEGIN_TEST(insert_nops.export_priority.set_prio)
finish_insert_nops_test();
END_TEST
BEGIN_TEST(insert_nops.valu_read_sgpr.basic)
if (!setup_cs(NULL, GFX12))
return;
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(7), s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(sgpr_null, s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(exec_lo, s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(m0, s1));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(scc, s1));
/* no hazard: SALU write missing */
//>> p_unit_test 0
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
/* no hazard: SGPR never read by VALU */
//! p_unit_test 1
//! s1: %0:s[16] = s_mov_b32 0
//! s1: %0:s[64] = s_mov_b32 %0:s[16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(16), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(16), s1));
/* basic case: SALU read */
//! p_unit_test 2
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
/* basic case again: VALU reads never expire */
//! p_unit_test 3
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
/* sa_sdst(0) resolves the hazard */
//! p_unit_test 4
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
//! p_unit_test 5
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.sopp(aco_opcode::s_waitcnt_depctr, 0xfffe);
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
/* basic case: VALU read */
//! p_unit_test 6
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr sa_sdst(0)
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
/* the SALU write is in the same SGPR pair as the VALU read */
//! p_unit_test 7
//! s1: %0:s[6] = s_mov_b32 0
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[6]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(6), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(6), s1));
/* no hazard: these registers are not problematic */
//! p_unit_test 8
//! s1: %0:null = s_mov_b32 0
//! s1: %0:s[64] = s_mov_b32 %0:null
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
bld.sop1(aco_opcode::s_mov_b32, Definition(sgpr_null, s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(sgpr_null, s1));
//! p_unit_test 9
//! s1: %0:exec_lo = s_mov_b32 0
//! s1: %0:s[64] = s_mov_b32 %0:exec_lo
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(exec_lo, s1));
//! p_unit_test 10
//! s1: %0:m0 = s_mov_b32 0
//! s1: %0:s[64] = s_mov_b32 %0:m0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(m0, s1));
//! p_unit_test 11
//! s1: %0:scc = s_cmp_lg_i32 0, 0
//! s1: %0:s[64] = s_mov_b32 %0:scc
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand::zero(4), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(scc, s1));
/* 11 SALU between the write and a VALU read expire the hazard */
//! p_unit_test 12
//! s1: %0:s[4] = s_mov_b32 0
//; for i in range(11): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
for (unsigned i = 0; i < 11; i++)
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
//! p_unit_test 13
//! s1: %0:s[4] = s_mov_b32 0
//; for i in range(10): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
//! s_waitcnt_depctr sa_sdst(0)
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
for (unsigned i = 0; i < 10; i++)
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
/* 10 SALU between the write and a SALU read expire the hazard */
//! p_unit_test 14
//! s1: %0:s[4] = s_mov_b32 0
//; for i in range(10): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
for (unsigned i = 0; i < 10; i++)
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
//! p_unit_test 15
//! s1: %0:s[4] = s_mov_b32 0
//; for i in range(9): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
for (unsigned i = 0; i < 9; i++)
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
/* SOPP in-between the write and the read do not count */
//! p_unit_test 16
//! s1: %0:s[4] = s_mov_b32 0
//; for i in range(9): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
//! s_nop
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
for (unsigned i = 0; i < 9; i++)
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
bld.sopp(aco_opcode::s_nop, 0);
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
finish_insert_nops_test();
END_TEST
BEGIN_TEST(insert_nops.valu_read_sgpr.previous_part)
if (!setup_cs(NULL, GFX12))
return;
/* Raytracing shaders have a prolog and may also be split into several parts. */
program->stage = raytracing_cs;
/* Despite the SGPR never being read by a VALU in this shader, a sa_sdst(0) is needed. */
//>> p_unit_test 0
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[64] = s_mov_b32 %0:s[4]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand(PhysReg(4), s1));
finish_insert_nops_test();
END_TEST
BEGIN_TEST(insert_nops.setpc_gfx6)
if (!setup_cs(NULL, GFX6))
return;
@@ -1894,5 +2084,42 @@ BEGIN_TEST(insert_nops.setpc_gfx12)
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
/* VALUReadSGPRHazard */
//! p_unit_test 4
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
//! s1: %0:s[4] = s_mov_b32 0
//! s_waitcnt_depctr va_vdst(0) sa_sdst(0)
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
//! p_unit_test 5
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
//! s1: %0:s[4] = s_mov_b32 0
//; for i in range(10): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
//! s_waitcnt_depctr va_vdst(0)
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
for (unsigned i = 0; i < 10; i++) /* the s_setpc_b64 counts */
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
//! p_unit_test 6
//! v1: %0:v[0] = v_mov_b32 %0:s[4]
//! s1: %0:s[4] = s_mov_b32 0
//; for i in range(9): insert_pattern('s1: %0:s[64] = s_mov_b32 0')
//! s_waitcnt_depctr va_vdst(0) sa_sdst(0)
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(256), v1), Operand(PhysReg(4), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(4), s1), Operand::zero(4));
for (unsigned i = 0; i < 9; i++)
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(64), s1), Operand::zero(4));
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
finish_insert_nops_test(true);
END_TEST