diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 7b2dece0ce1..b90ac066387 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -266,9 +266,12 @@ print_scope(sync_scope scope, FILE* output, const char* prefix = "scope") static void print_sync(memory_sync_info sync, FILE* output) { - print_storage(sync.storage, output); - print_semantics(sync.semantics, output); - print_scope(sync.scope, output); + if (sync.storage) + print_storage(sync.storage, output); + if (sync.semantics) + print_semantics(sync.semantics, output); + if (sync.scope != scope_invocation) + print_scope(sync.scope, output); } static void diff --git a/src/amd/compiler/tests/test_insert_nops.cpp b/src/amd/compiler/tests/test_insert_nops.cpp index 7587246801e..6c1b00f0996 100644 --- a/src/amd/compiler/tests/test_insert_nops.cpp +++ b/src/amd/compiler/tests/test_insert_nops.cpp @@ -55,34 +55,34 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) /* no nop needed because offset&6==0 */ //>> p_unit_test 0 - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation - //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:8 offen storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d + //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:8 offen bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); create_mimg(true, 6, 4); create_mubuf(8); /* nop needed */ //! p_unit_test 1 - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d //! s_nop - //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation + //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); create_mimg(true, 6, 4); create_mubuf(4); /* no nop needed because the MIMG is not NSA */ //! p_unit_test 2 - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1], %0:v[2], %0:v[3], %0:v[4], %0:v[5] 2d storage: semantics: scope:invocation - //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1], %0:v[2], %0:v[3], %0:v[4], %0:v[5] 2d + //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); create_mimg(false, 6, 2); create_mubuf(4); /* no nop needed because there's already an instruction in-between */ //! p_unit_test 3 - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d //! v_nop - //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation + //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); create_mimg(true, 6, 4); bld.vop1(aco_opcode::v_nop); @@ -90,19 +90,19 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) /* no nop needed because the NSA instruction is under 4 dwords */ //! p_unit_test 4 - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation - //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d + //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); create_mimg(true, 2, 3); create_mubuf(4); /* NSA instruction and MUBUF/MTBUF in a different block */ //! p_unit_test 5 - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d //! BB1 //! /* logical preds: / linear preds: BB0, / kind: uniform, */ //! s_nop - //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation + //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); create_mimg(true, 6, 4); bld.reset(program->create_and_insert_block()); @@ -121,7 +121,7 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug) //>> p_unit_test 0 //! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255] //! s_nop - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), Operand(PhysReg(511), v1)); @@ -130,7 +130,7 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug) /* no nop needed because the MIMG is not NSA */ //! p_unit_test 1 //! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255] - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1] 2d storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1] 2d bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), Operand(PhysReg(511), v1)); @@ -140,7 +140,7 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug) //! p_unit_test 2 //! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255] //! v_nop - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), Operand(PhysReg(511), v1)); @@ -153,7 +153,7 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug) //! BB1 //! /* logical preds: / linear preds: BB0, / kind: uniform, */ //! s_nop - //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation + //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), Operand(PhysReg(511), v1)); diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 593d401b67a..01593155210 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -70,7 +70,7 @@ BEGIN_TEST(isel.compute.simple) }; void main() { //>> v1: %data = p_parallelcopy 42 - //buffer_store_dword %_, v1: undef, 0, %data disable_wqm storage:buffer semantics: scope:invocation + //! buffer_store_dword (kill)%_, v1: undef, 0, (kill)%data glc disable_wqm storage:buffer res = 42; } ); @@ -158,13 +158,13 @@ BEGIN_TEST(isel.sparse.clause) //; funcs['sample_res'] = lambda _: 'v#_' //; funcs['sample_coords'] = lambda _: '[v#_, v#_, v#_, v#_]' //>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_, %_ dmask:xyzw 2d tfe //>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_, %_ dmask:xyzw 2d tfe //>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_, %_ dmask:xyzw 2d tfe //>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_, (kill)%_ dmask:xyzw 2d tfe storage: semantics: scope:invocation + //>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_, (kill)%_ dmask:xyzw 2d tfe //>> s_clause 0x3 //! image_sample_lz_o @sample_res, @sample_coords, @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe //! image_sample_lz_o @sample_res, @sample_coords, @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index d3d3fb09bc2..1f4840995da 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -482,7 +482,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf) //! /* logical preds: BB0, / linear preds: BB0, / kind: */ //! p_logical_start - //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen storage: semantics: scope:invocation + //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true); //! p_logical_end @@ -561,7 +561,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten) //! v1: %addr:v[0] = p_parallelcopy %f:s[2] Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f); - //! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen storage: semantics: scope:invocation + //! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d, Operand::zero(), 0, true); //! p_logical_end @@ -633,7 +633,7 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf) //! /* logical preds: BB0, / linear preds: BB0, / kind: */ //! p_logical_start - //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen storage: semantics: scope:invocation + //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true); //! p_logical_end @@ -716,7 +716,7 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten) //! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4] Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f); - //! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen storage: semantics: scope:invocation + //! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(), 0, true); //! p_logical_end