aco/tests: add GFX12 assembler tests

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Acked-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29162>
This commit is contained in:
Rhys Perry
2024-02-23 12:27:38 +00:00
committed by Marge Bot
parent e1e5bc0dd0
commit 5e58e32832
2 changed files with 722 additions and 532 deletions

View File

@@ -557,7 +557,7 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6)
("smem", [Format.SMEM], [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]),
("ds", [Format.DS], [(1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]),
("ldsdir", [Format.LDSDIR], [(1, 1)]),
("mubuf", [Format.MUBUF], [(0, 4), (1, 3)]),
("mubuf", [Format.MUBUF], [(0, 4), (1, 3), (1, 4)]),
("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]),
("mimg", [Format.MIMG], itertools.product([0, 1], [3, 4, 5, 6, 7])),
("exp", [Format.EXP], [(0, 4), (0, 5)]),
@@ -594,9 +594,9 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6)
("vop1_e64_dpp8", [Format.VOP1, Format.VOP3, Format.DPP8], itertools.product([1], [1])),
("vop2_e64_dpp8", [Format.VOP2, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2, 3])),
("vopc_e64_dpp8", [Format.VOPC, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2])),
("flat", [Format.FLAT], [(0, 3), (1, 2)]),
("global", [Format.GLOBAL], [(0, 3), (1, 2)]),
("scratch", [Format.SCRATCH], [(0, 3), (1, 2)])]
("flat", [Format.FLAT], [(0, 3), (1, 2), (1, 3)]),
("global", [Format.GLOBAL], [(0, 3), (1, 2), (1, 3)]),
("scratch", [Format.SCRATCH], [(0, 3), (1, 2), (1, 3)])]
formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
%>\\
% for name, formats, shapes, extra_field_setup in formats:

View File

@@ -10,6 +10,17 @@
using namespace aco;
static std::vector<amd_gfx_level>
filter_gfx_levels(std::vector<amd_gfx_level> src)
{
std::vector<amd_gfx_level> res;
for (amd_gfx_level gfx : src) {
if (gfx < GFX12 || LLVM_VERSION_MAJOR >= 19)
res.push_back(gfx);
}
return res;
}
BEGIN_TEST(assembler.s_memtime)
for (unsigned i = GFX6; i <= GFX10; i++) {
if (!setup_cs(NULL, (amd_gfx_level)i))
@@ -365,9 +376,10 @@ BEGIN_TEST(assembler.vopc_sdwa)
}
END_TEST
BEGIN_TEST(assembler.gfx11.smem)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.smem)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst = bld.def(s1);
dst.setFixed(PhysReg(4));
@@ -381,30 +393,36 @@ BEGIN_TEST(assembler.gfx11.smem)
Operand op_s4(bld.tmp(s4));
op_s4.setFixed(PhysReg(32));
//>> s_dcache_inv ; f4840000 f8000000
//~gfx11>> s_dcache_inv ; f4840000 f8000000
//~gfx12>> s_dcache_inv ; f4042000 f8000000
bld.smem(aco_opcode::s_dcache_inv);
//! s_load_b32 s4, s[16:17], 0x2a ; f4000108 f800002a
bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42));
//! s_load_b32 s4, s[16:17], s8 ; f4000108 10000000
//~gfx11! s_load_b32 s4, s[16:17], s8 ; f4000108 10000000
//~gfx12! s_load_b32 s4, s[16:17], s8 offset:0x0 ; f4000108 10000000
bld.smem(aco_opcode::s_load_dword, dst, op_s2, op_s1);
//! s_load_b32 s4, s[16:17], s8 offset:0x2a ; f4000108 1000002a
bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1);
//! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000
//~gfx12! s_buffer_load_b32 s4, s[32:35], s8 offset:0x0 scope:SCOPE_SYS ; f4620110 10000000
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().glc = true;
//! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000
//~gfx12! (then repeated 1 times)
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().dlc = true;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.mubuf)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.mubuf)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst = bld.def(v1);
dst.setFixed(PhysReg(256 + 42));
@@ -428,94 +446,128 @@ BEGIN_TEST(assembler.gfx11.mubuf)
fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
/* Addressing */
//>> buffer_load_b32 v42, off, s[32:35], s30 ; e0500000 1e082a80
//~gfx11>> buffer_load_b32 v42, off, s[32:35], s30 ; e0500000 1e082a80
//~gfx12>> buffer_load_b32 v42, off, s[32:35], s30 ; c405001e 0080402a 00000000
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 0, false);
//! buffer_load_b32 v42, off, s[32:35], 42 ; e0500000 aa082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::c32(42), 0, false);
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 ; e0500000 80082a80
//~gfx12! buffer_load_b32 v42, off, s[32:35], null ; c405007c 0080402a 00000000
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false);
//! buffer_load_b32 v42, v10, s[32:35], s30 offen ; e0500000 1e482a0a
//~gfx11! buffer_load_b32 v42, off, s[32:35], 42 ; e0500000 aa082a80
if (gfx == GFX11)
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::c32(42), 0,
false);
//~gfx11! buffer_load_b32 v42, v10, s[32:35], s30 offen ; e0500000 1e482a0a
//~gfx12! buffer_load_b32 v42, v10, s[32:35], s30 offen ; c405001e 4080402a 0000000a
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true);
//! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a
//~gfx11! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a
//~gfx12! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; c405001e 8080402a 0000000a
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen =
true;
//! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14
//~gfx11! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14
//~gfx12! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; c405001e c080402a 00000014
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen =
true;
//! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80
//~gfx11! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80
//~gfx12! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; c405001e 0080402a 00005400
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
/* Various flags */
//! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
//~gfx12! buffer_load_b32 v42, off, s[32:35], null scope:SCOPE_SYS ; c405007c 008c402a 00000000
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.glc = true;
//! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
//~gfx12! (then repeated 2 times)
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.dlc = true;
//! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.slc = true;
//; if llvm_ver >= 16:
//; if llvm_ver >= 16 and variant == 'gfx11':
//; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80')
//; else:
//; elif variant == 'gfx11':
//; insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80')
//~gfx12! buffer_load_b32 v[42:43], off, s[32:35], null tfe ; c445007c 0080402a 00000000
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.tfe = true;
/* LDS */
//! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
if (gfx == GFX11) {
//~gfx11! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
false)
->mubuf()
.lds = true;
//! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
//~gfx11! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
false)
->mubuf()
.lds = true;
//! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
//~gfx11! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
false)
->mubuf()
.lds = true;
//! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
//~gfx11! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
false)
->mubuf()
.lds = true;
//! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
//~gfx11! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
false)
->mubuf()
.lds = true;
//! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
//~gfx11! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
false)
->mubuf()
.lds = true;
}
/* Stores */
//! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80
//~gfx11! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80
//~gfx12! buffer_store_b32 v10, off, s[32:35], s30 scope:SCOPE_SYS ; c406801e 008c400a 00000000
bld.mubuf(aco_opcode::buffer_store_dword, op_s4, Operand(v1), op_s1, op_v1, 0, false);
//! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen ; e06c0000 1e48140a
//~gfx11! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen ; e06c0000 1e48140a
//~gfx12! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen scope:SCOPE_SYS ; c406c01e 408c4014 0000000a
bld.mubuf(aco_opcode::buffer_store_dwordx2, op_s4, op_v1, op_s1, op_v2, 0, true);
/* Atomic with return */
//~gfx11! buffer_atomic_add_u32 v10, off, s[32:35], 0 glc ; e0d44000 80080a80
//~gfx12! buffer_atomic_add_u32 v10, off, s[32:35], null th:TH_ATOMIC_RETURN ; c40d407c 0090400a 00000000
bld.mubuf(aco_opcode::buffer_atomic_add, Definition(op_v1.physReg(), v1), op_s4, Operand(v1),
Operand::zero(), op_v1, 0, false)
->mubuf()
.glc = true;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.mtbuf)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.mtbuf)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst = bld.def(v1);
dst.setFixed(PhysReg(256 + 42));
@@ -539,73 +591,92 @@ BEGIN_TEST(assembler.gfx11.mtbuf)
fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
/* Addressing */
//>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
//~gfx11>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
//~gfx12>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; c420001e 1900402a 00000080
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0,
false);
//! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt,
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80082a80
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] ; c420007c 1900402a 00000080
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false);
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
if (gfx == GFX11)
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42),
dfmt, nfmt, 0, false);
//~gfx11! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
//~gfx12! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; c420001e 5900402a 0000000a
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true);
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
//~gfx11! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
//~gfx12! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; c420001e 9900402a 0000000a
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)
->mtbuf()
.idxen = true;
//! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
//~gfx11! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
//~gfx12! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; c420001e d900402a 00000014
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)
->mtbuf()
.idxen = true;
//! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; c420001e 1900402a 00005480
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84,
false);
/* Various flags */
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] scope:SCOPE_SYS ; c420007c 190c402a 00000080
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.glc = true;
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
//~gfx12! (then repeated 2 times)
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.dlc = true;
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.slc = true;
//; if llvm_ver >= 16:
//; if llvm_ver >= 16 and variant == 'gfx11':
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
//; else:
//; elif variant == 'gfx11':
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80')
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] ; c460007c 1900402a 00000080
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.tfe = true;
/* Stores */
//! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
//~gfx11! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
//~gfx12! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] scope:SCOPE_SYS ; c421001e 190c400a 00000080
bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0,
false);
//! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true);
//~gfx11! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
//~gfx12! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen scope:SCOPE_SYS ; c421401e 590c4014 0000000a
bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0,
true);
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.mimg)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.mimg)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst_v1 = bld.def(v1);
dst_v1.setFixed(PhysReg(256 + 42));
@@ -628,64 +699,125 @@ BEGIN_TEST(assembler.gfx11.mimg)
Operand op_v4(bld.tmp(v4));
op_v4.setFixed(PhysReg(256 + 30));
//>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; f06c0f00 2010540a
//~gfx11>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; f06c0f00 2010540a
//~gfx12>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; e7c6c000 10008054 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1);
//! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
//~gfx11! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
//~gfx12! image_sample v[84:87], [v20, v21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; e7c6c001 10008054 00001514
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim =
ac_image_2d;
//! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1;
//~gfx11! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
//~gfx12! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; e446c000 1000802a 0000000a
bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask =
0x1;
/* Various flags */
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc = true;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D scope:SCOPE_SYS ; e7c6c000 100c8054 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc =
true;
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc = true;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
//~gfx12! (then repeated 2 times)
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc =
true;
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc = true;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc =
true;
//! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().tfe = true;
//~gfx11! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
//~gfx12! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; e7c6c008 10008054 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().tfe =
true;
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; f06c0f00 2050540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().lwe = true;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; f06c0f00 2050540a
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; e7c6c000 10008154 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().lwe =
true;
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; f06c8f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().r128 = true;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; f06c8f00 2010540a
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; e7c6c010 10008054 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().r128 =
true;
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; f06d0f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().a16 = true;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; f06d0f00 2010540a
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; e7c6c040 10008054 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().a16 =
true;
//! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; f06e0f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().d16 = true;
//~gfx11! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; f06e0f00 2010540a
//~gfx12! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; e7c6c020 10008054 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().d16 =
true;
/* NSA */
//! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
//~gfx11! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
//~gfx12! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; e7c6c001 10008054 0000280a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1,
Operand(bld.tmp(v1), PhysReg(256 + 40)))
->mimg()
.dim = ac_image_2d;
//~gfx11! image_bvh_intersect_ray v[84:87], [v40, v42, v[44:46], v[48:50], v[52:54]], s[32:35] ; f0648f81 00085428 34302c2a
//~gfx12! image_bvh_intersect_ray v[84:87], [v40, v42, v[44:46], v[48:50], v[52:54]], s[32:35] ; d3c64010 34004054 302c2a28
aco_ptr<Instruction> instr{
create_instruction(aco_opcode::image_bvh_intersect_ray, Format::MIMG, 8, 1)};
instr->definitions[0] = dst_v4;
instr->operands[0] = op_s4;
instr->operands[1] = Operand(s4);
instr->operands[2] = Operand(v1);
instr->operands[3] = Operand(PhysReg(256 + 40), v1); /* node */
instr->operands[4] = Operand(PhysReg(256 + 42), v1); /* tmax */
instr->operands[5] = Operand(PhysReg(256 + 44), v3); /* origin */
instr->operands[6] = Operand(PhysReg(256 + 48), v3); /* dir */
instr->operands[7] = Operand(PhysReg(256 + 52), v3); /* inv dir */
instr->mimg().dmask = 0xf;
instr->mimg().unrm = true;
instr->mimg().r128 = true;
bld.insert(std::move(instr));
/* Stores */
//! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
//~gfx11! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
//~gfx12! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D scope:SCOPE_SYS ; d3c18000 000c801e 0000000a
bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1);
//! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
//~gfx11! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
//~gfx12! image_atomic_add_uint v10, [v20, v21, v0, v0], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; d3c30001 0000800a 00001514
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
op_v1, op_v2)
->mimg()
.dim = ac_image_2d;
/* Atomic with return */
//~gfx11! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D glc ; f0304f04 00100a14
//~gfx12! image_atomic_add_uint v10, [v20, v21, v0, v0], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN ; d3c30001 0010800a 00001514
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
op_v1, op_v2, 0xf, false, false, false, true)
->mimg()
.dim = ac_image_2d;
//~gfx11! image_load v[84:87], v[20:21], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0000f04 00105414
//~gfx12! image_load v[84:87], [v20, v21], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; d3c00001 00008054 00001514
bld.mimg(aco_opcode::image_load, dst_v4, op_s8, Operand(s4), Operand(v1), op_v2)->mimg().dim =
ac_image_2d;
//~gfx11! image_msaa_load v[84:87], v[30:33], s[64:71] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; f060011c 0010541e
//~gfx12! image_msaa_load v[84:87], [v30, v31, v32, v33], s[64:71] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; e4460007 00008054 21201f1e
bld.mimg(aco_opcode::image_msaa_load, dst_v4, op_s8, Operand(s4), Operand(v1), op_v4, 0x1)
->mimg()
.dim = ac_image_2darraymsaa;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.flat)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.flat)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst_v1 = bld.def(v1);
dst_v1.setFixed(PhysReg(256 + 42));
@@ -703,53 +835,73 @@ BEGIN_TEST(assembler.gfx11.flat)
op_v2.setFixed(PhysReg(256 + 20));
/* Addressing */
//>> flat_load_b32 v42, v[20:21] ; dc500000 2a7c0014
//~gfx11>> flat_load_b32 v42, v[20:21] ; dc500000 2a7c0014
//~gfx12>> flat_load_b32 v42, v[20:21] ; ec05007c 0000002a 00000014
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1));
//! global_load_b32 v42, v[20:21], off ; dc520000 2a7c0014
//~gfx11! global_load_b32 v42, v[20:21], off ; dc520000 2a7c0014
//~gfx12! global_load_b32 v42, v[20:21], off ; ee05007c 0000002a 00000014
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1));
//! global_load_b32 v42, v10, s[64:65] ; dc520000 2a40000a
//~gfx11! global_load_b32 v42, v10, s[64:65] ; dc520000 2a40000a
//~gfx12! global_load_b32 v42, v10, s[64:65] ; ee050040 0000002a 0000000a
bld.global(aco_opcode::global_load_dword, dst_v1, op_v1, op_s2);
//! scratch_load_b32 v42, v10, off ; dc510000 2afc000a
//~gfx11! scratch_load_b32 v42, v10, off ; dc510000 2afc000a
//~gfx12! scratch_load_b32 v42, v10, off ; ed05007c 0002002a 0000000a
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, Operand(s1));
//! scratch_load_b32 v42, off, s32 ; dc510000 2a200080
//~gfx11! scratch_load_b32 v42, off, s32 ; dc510000 2a200080
//~gfx12! scratch_load_b32 v42, off, s32 ; ed050020 0000002a 00000000
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, Operand(v1), op_s1);
//! scratch_load_b32 v42, v10, s32 ; dc510000 2aa0000a
//~gfx11! scratch_load_b32 v42, v10, s32 ; dc510000 2aa0000a
//~gfx12! scratch_load_b32 v42, v10, s32 ; ed050020 0002002a 0000000a
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, op_s1);
//! scratch_load_b32 v42, off, off ; dc510000 2a7c0080
//~gfx11! scratch_load_b32 v42, off, off ; dc510000 2a7c0080
//~gfx12! scratch_load_b32 v42, off, off ; ed05007c 0000002a 00000000
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, Operand(v1), Operand(s1));
//! global_load_b32 v42, v[20:21], off offset:-42 ; dc521fd6 2a7c0014
//~gfx11! global_load_b32 v42, v[20:21], off offset:-42 ; dc521fd6 2a7c0014
//~gfx12! global_load_b32 v42, v[20:21], off offset:-42 ; ee05007c 0000002a ffffd614
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), -42);
//! global_load_b32 v42, v[20:21], off offset:84 ; dc520054 2a7c0014
//~gfx11! global_load_b32 v42, v[20:21], off offset:84 ; dc520054 2a7c0014
//~gfx12! global_load_b32 v42, v[20:21], off offset:84 ; ee05007c 0000002a 00005414
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), 84);
/* Various flags */
//! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014
//~gfx11! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014
//~gfx12! flat_load_b32 v42, v[20:21] scope:SCOPE_SYS ; ec05007c 000c002a 00000014
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().slc = true;
//! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014
//~gfx11! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014
//~gfx12! (then repeated 2 times)
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().glc = true;
//! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014
//~gfx11! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().dlc = true;
/* Stores */
//! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14
//~gfx11! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14
//~gfx12! flat_store_b32 v[20:21], v10 scope:SCOPE_SYS ; ec06807c 050c0000 00000014
bld.flat(aco_opcode::flat_store_dword, op_v2, Operand(s1), op_v1);
/* Atomic with return */
//~gfx11! global_atomic_add_u32 v42, v[20:21], v10, off glc ; dcd64000 2a7c0a14
//~gfx12! global_atomic_add_u32 v42, v[20:21], v10, off th:TH_ATOMIC_RETURN ; ee0d407c 0510002a 00000014
bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().glc =
true;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.exp)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.exp)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Operand op[4];
for (unsigned i = 0; i < 4; i++)
@@ -758,24 +910,30 @@ BEGIN_TEST(assembler.gfx11.exp)
Operand op_m0(bld.tmp(s1));
op_m0.setFixed(m0);
//>> exp mrt3 v1, v0, v3, v2 ; f800003f 02030001
//~gfx11>> exp mrt3 v1, v0, v3, v2 ; f800003f 02030001
//~gfx12>> export mrt3 v1, v0, v3, v2 ; f800003f 02030001
bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3);
//! exp mrt3 v1, off, v0, off ; f8000035 80008001
//~gfx11! exp mrt3 v1, off, v0, off ; f8000035 80008001
//~gfx12! export mrt3 v1, off, v0, off ; f8000035 80008001
bld.exp(aco_opcode::exp, op[1], Operand(v1), op[0], Operand(v1), 0x5, 3);
//! exp mrt3 v1, v0, v3, v2 done ; f800083f 02030001
//~gfx11! exp mrt3 v1, v0, v3, v2 done ; f800083f 02030001
//~gfx12! export mrt3 v1, v0, v3, v2 done ; f800083f 02030001
bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3, false, true);
//>> exp mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001
//~gfx11! exp mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001
//~gfx12! export mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001
bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], op_m0, 0xf, 3)->exp().row_en = true;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.vinterp)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.vinterp)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst = bld.def(v1);
dst.setFixed(PhysReg(256 + 42));
@@ -837,11 +995,13 @@ BEGIN_TEST(assembler.gfx11.vinterp)
.clamp = true;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.ldsdir)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.ldsdir)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst = bld.def(v1);
dst.setFixed(PhysReg(256 + 42));
@@ -852,39 +1012,52 @@ BEGIN_TEST(assembler.gfx11.ldsdir)
//! llvm_version: #llvm_ver
fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
//>> lds_direct_load v42 wait_vdst:15 ; ce1f002a
//~gfx11>> lds_direct_load v42 wait_vdst:15 ; ce1f002a
//~gfx12>> ds_direct_load v42 wait_va_vdst:15 wait_vm_vsrc:1 ; ce9f002a
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 15;
//! lds_direct_load v42 wait_vdst:6 ; ce16002a
//~gfx11! lds_direct_load v42 wait_vdst:6 ; ce16002a
//~gfx12! ds_direct_load v42 wait_va_vdst:6 wait_vm_vsrc:1 ; ce96002a
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 6;
//; if llvm_ver >= 18:
//; if llvm_ver >= 18 and variant == 'gfx11':
//; insert_pattern('lds_direct_load v42 wait_vdst:0 ; ce10002a')
//; else:
//; elif variant == 'gfx11':
//; insert_pattern('lds_direct_load v42 ; ce10002a')
//~gfx12! ds_direct_load v42 wait_va_vdst:0 wait_vm_vsrc:1 ; ce90002a
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 0;
//! lds_param_load v42, attr56.x wait_vdst:8 ; ce08e02a
//~gfx11! lds_param_load v42, attr56.x wait_vdst:8 ; ce08e02a
//~gfx12! ds_param_load v42, attr56.x wait_va_vdst:8 wait_vm_vsrc:1 ; ce88e02a
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 8;
//; if llvm_ver >= 18:
//; if llvm_ver >= 18 and variant == 'gfx11':
//; insert_pattern('lds_param_load v42, attr56.x wait_vdst:0 ; ce00e02a')
//; else:
//; elif variant == 'gfx11':
//; insert_pattern('lds_param_load v42, attr56.x ; ce00e02a')
//~gfx12! ds_param_load v42, attr56.x wait_va_vdst:0 wait_vm_vsrc:1 ; ce80e02a
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 0;
//! lds_param_load v42, attr34.y wait_vdst:8 ; ce08892a
//~gfx11! lds_param_load v42, attr34.y wait_vdst:8 ; ce08892a
//~gfx12! ds_param_load v42, attr34.y wait_va_vdst:8 wait_vm_vsrc:1 ; ce88892a
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 34, 1)->ldsdir().wait_vdst = 8;
//! lds_param_load v42, attr12.z wait_vdst:8 ; ce08322a
//~gfx11! lds_param_load v42, attr12.z wait_vdst:8 ; ce08322a
//~gfx12! ds_param_load v42, attr12.z wait_va_vdst:8 wait_vm_vsrc:1 ; ce88322a
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 12, 2)->ldsdir().wait_vdst = 8;
//~gfx11>> lds_direct_load v42 wait_vdst:15 ; ce1f002a
//~gfx12>> ds_direct_load v42 wait_va_vdst:15 wait_vm_vsrc:0 ; ce1f002a
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vsrc = 0;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.gfx11.vop12c_v128)
if (!setup_cs(NULL, GFX11))
return;
BEGIN_TEST(assembler.vop12c_v128)
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst_v0 = bld.def(v1);
dst_v0.setFixed(PhysReg(256));
@@ -978,11 +1151,13 @@ BEGIN_TEST(assembler.gfx11.vop12c_v128)
.abs[0] = true;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.vop3_dpp)
if (!setup_cs(NULL, GFX11))
return;
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst_v0 = bld.def(v1);
dst_v0.setFixed(PhysReg(256));
@@ -1024,8 +1199,9 @@ BEGIN_TEST(assembler.vop3_dpp)
bld.vop1_e64_dpp(aco_opcode::v_sqrt_f32, dst_v0, op_v1, dpp_row_rr(1))->valu().clamp = true;
//! v_cmp_lt_f32_e64_dpp s[4:5], |v1|, |v2| row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d4110304 000204fa ffad2101
bld.vopc_e64_dpp(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2, dpp_row_rr(1))->valu().abs =
0x3;
bld.vopc_e64_dpp(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2, dpp_row_rr(1))
->valu()
.abs = 0x3;
//! v_add_f32_e64_dpp v0, v1, v2 mul:4 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; d5030000 100204ea 00000001
bld.vop2_e64_dpp8(aco_opcode::v_add_f32, dst_v0, op_v1, op_v2)->valu().omod = 2;
@@ -1037,11 +1213,13 @@ BEGIN_TEST(assembler.vop3_dpp)
bld.vopc_e64_dpp8(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2)->valu().abs = 0x1;
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.vopd)
if (!setup_cs(NULL, GFX11))
return;
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
Definition dst_v0 = bld.def(v1);
dst_v0.setFixed(PhysReg(256));
@@ -1069,7 +1247,8 @@ BEGIN_TEST(assembler.vopd)
//>> BB0:
//! v_dual_mov_b32 v0, v0 :: v_dual_mov_b32 v1, v1 ; ca100100 00000101
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, aco_opcode::v_dual_mov_b32);
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1,
aco_opcode::v_dual_mov_b32);
//! v_dual_mov_b32 v0, 0x60 :: v_dual_mov_b32 v1, s0 ; ca1000ff 00000000 00000060
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, Operand::c32(96), op_s0,
@@ -1095,6 +1274,17 @@ BEGIN_TEST(assembler.vopd)
bld.vopd(aco_opcode::v_dual_cndmask_b32, dst_v0, dst_v1, op_v0, op_v1, op_vcc, op_v2, op_v3,
op_vcc, aco_opcode::v_dual_cndmask_b32);
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.pseudo_scalar_trans)
if (LLVM_VERSION_MAJOR < 19 || !setup_cs(NULL, GFX12))
return;
//>> v_s_sqrt_f32 s5, s1 ; d6880005 00000001
bld.vop3(aco_opcode::v_s_sqrt_f32, Definition(PhysReg(5), s1), Operand(PhysReg(1), s1));
finish_assembler_test();
END_TEST