aco/tests: add GFX12 assembler tests
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Acked-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29162>
This commit is contained in:
@@ -557,7 +557,7 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6)
|
||||
("smem", [Format.SMEM], [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]),
|
||||
("ds", [Format.DS], [(1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]),
|
||||
("ldsdir", [Format.LDSDIR], [(1, 1)]),
|
||||
("mubuf", [Format.MUBUF], [(0, 4), (1, 3)]),
|
||||
("mubuf", [Format.MUBUF], [(0, 4), (1, 3), (1, 4)]),
|
||||
("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]),
|
||||
("mimg", [Format.MIMG], itertools.product([0, 1], [3, 4, 5, 6, 7])),
|
||||
("exp", [Format.EXP], [(0, 4), (0, 5)]),
|
||||
@@ -594,9 +594,9 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6)
|
||||
("vop1_e64_dpp8", [Format.VOP1, Format.VOP3, Format.DPP8], itertools.product([1], [1])),
|
||||
("vop2_e64_dpp8", [Format.VOP2, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2, 3])),
|
||||
("vopc_e64_dpp8", [Format.VOPC, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2])),
|
||||
("flat", [Format.FLAT], [(0, 3), (1, 2)]),
|
||||
("global", [Format.GLOBAL], [(0, 3), (1, 2)]),
|
||||
("scratch", [Format.SCRATCH], [(0, 3), (1, 2)])]
|
||||
("flat", [Format.FLAT], [(0, 3), (1, 2), (1, 3)]),
|
||||
("global", [Format.GLOBAL], [(0, 3), (1, 2), (1, 3)]),
|
||||
("scratch", [Format.SCRATCH], [(0, 3), (1, 2), (1, 3)])]
|
||||
formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
|
||||
%>\\
|
||||
% for name, formats, shapes, extra_field_setup in formats:
|
||||
|
@@ -10,6 +10,17 @@
|
||||
|
||||
using namespace aco;
|
||||
|
||||
static std::vector<amd_gfx_level>
|
||||
filter_gfx_levels(std::vector<amd_gfx_level> src)
|
||||
{
|
||||
std::vector<amd_gfx_level> res;
|
||||
for (amd_gfx_level gfx : src) {
|
||||
if (gfx < GFX12 || LLVM_VERSION_MAJOR >= 19)
|
||||
res.push_back(gfx);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
BEGIN_TEST(assembler.s_memtime)
|
||||
for (unsigned i = GFX6; i <= GFX10; i++) {
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
@@ -365,9 +376,10 @@ BEGIN_TEST(assembler.vopc_sdwa)
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.smem)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.smem)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst = bld.def(s1);
|
||||
dst.setFixed(PhysReg(4));
|
||||
@@ -381,30 +393,36 @@ BEGIN_TEST(assembler.gfx11.smem)
|
||||
Operand op_s4(bld.tmp(s4));
|
||||
op_s4.setFixed(PhysReg(32));
|
||||
|
||||
//>> s_dcache_inv ; f4840000 f8000000
|
||||
//~gfx11>> s_dcache_inv ; f4840000 f8000000
|
||||
//~gfx12>> s_dcache_inv ; f4042000 f8000000
|
||||
bld.smem(aco_opcode::s_dcache_inv);
|
||||
|
||||
//! s_load_b32 s4, s[16:17], 0x2a ; f4000108 f800002a
|
||||
bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42));
|
||||
|
||||
//! s_load_b32 s4, s[16:17], s8 ; f4000108 10000000
|
||||
//~gfx11! s_load_b32 s4, s[16:17], s8 ; f4000108 10000000
|
||||
//~gfx12! s_load_b32 s4, s[16:17], s8 offset:0x0 ; f4000108 10000000
|
||||
bld.smem(aco_opcode::s_load_dword, dst, op_s2, op_s1);
|
||||
|
||||
//! s_load_b32 s4, s[16:17], s8 offset:0x2a ; f4000108 1000002a
|
||||
bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1);
|
||||
|
||||
//! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000
|
||||
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000
|
||||
//~gfx12! s_buffer_load_b32 s4, s[32:35], s8 offset:0x0 scope:SCOPE_SYS ; f4620110 10000000
|
||||
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().glc = true;
|
||||
|
||||
//! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000
|
||||
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000
|
||||
//~gfx12! (then repeated 1 times)
|
||||
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().dlc = true;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.mubuf)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.mubuf)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst = bld.def(v1);
|
||||
dst.setFixed(PhysReg(256 + 42));
|
||||
@@ -428,94 +446,128 @@ BEGIN_TEST(assembler.gfx11.mubuf)
|
||||
fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
|
||||
|
||||
/* Addressing */
|
||||
//>> buffer_load_b32 v42, off, s[32:35], s30 ; e0500000 1e082a80
|
||||
//~gfx11>> buffer_load_b32 v42, off, s[32:35], s30 ; e0500000 1e082a80
|
||||
//~gfx12>> buffer_load_b32 v42, off, s[32:35], s30 ; c405001e 0080402a 00000000
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 0, false);
|
||||
|
||||
//! buffer_load_b32 v42, off, s[32:35], 42 ; e0500000 aa082a80
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::c32(42), 0, false);
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 ; e0500000 80082a80
|
||||
//~gfx12! buffer_load_b32 v42, off, s[32:35], null ; c405007c 0080402a 00000000
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false);
|
||||
|
||||
//! buffer_load_b32 v42, v10, s[32:35], s30 offen ; e0500000 1e482a0a
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 42 ; e0500000 aa082a80
|
||||
if (gfx == GFX11)
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::c32(42), 0,
|
||||
false);
|
||||
|
||||
//~gfx11! buffer_load_b32 v42, v10, s[32:35], s30 offen ; e0500000 1e482a0a
|
||||
//~gfx12! buffer_load_b32 v42, v10, s[32:35], s30 offen ; c405001e 4080402a 0000000a
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true);
|
||||
|
||||
//! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a
|
||||
//~gfx11! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a
|
||||
//~gfx12! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; c405001e 8080402a 0000000a
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen =
|
||||
true;
|
||||
|
||||
//! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14
|
||||
//~gfx11! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14
|
||||
//~gfx12! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; c405001e c080402a 00000014
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen =
|
||||
true;
|
||||
|
||||
//! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80
|
||||
//~gfx12! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; c405001e 0080402a 00005400
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
|
||||
|
||||
/* Various flags */
|
||||
//! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
|
||||
//~gfx12! buffer_load_b32 v42, off, s[32:35], null scope:SCOPE_SYS ; c405007c 008c402a 00000000
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.glc = true;
|
||||
|
||||
//! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.dlc = true;
|
||||
|
||||
//! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.slc = true;
|
||||
|
||||
//; if llvm_ver >= 16:
|
||||
//; if llvm_ver >= 16 and variant == 'gfx11':
|
||||
//; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80')
|
||||
//; else:
|
||||
//; elif variant == 'gfx11':
|
||||
//; insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80')
|
||||
//~gfx12! buffer_load_b32 v[42:43], off, s[32:35], null tfe ; c445007c 0080402a 00000000
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.tfe = true;
|
||||
|
||||
/* LDS */
|
||||
//! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
if (gfx == GFX11) {
|
||||
//~gfx11! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
|
||||
false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
//~gfx11! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
|
||||
false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
//~gfx11! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
|
||||
false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
//~gfx11! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
|
||||
false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
//~gfx11! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
|
||||
false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
|
||||
//! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
|
||||
//~gfx11! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080
|
||||
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0,
|
||||
false)
|
||||
->mubuf()
|
||||
.lds = true;
|
||||
}
|
||||
|
||||
/* Stores */
|
||||
//! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80
|
||||
//~gfx11! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80
|
||||
//~gfx12! buffer_store_b32 v10, off, s[32:35], s30 scope:SCOPE_SYS ; c406801e 008c400a 00000000
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, op_s4, Operand(v1), op_s1, op_v1, 0, false);
|
||||
|
||||
//! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen ; e06c0000 1e48140a
|
||||
//~gfx11! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen ; e06c0000 1e48140a
|
||||
//~gfx12! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen scope:SCOPE_SYS ; c406c01e 408c4014 0000000a
|
||||
bld.mubuf(aco_opcode::buffer_store_dwordx2, op_s4, op_v1, op_s1, op_v2, 0, true);
|
||||
|
||||
/* Atomic with return */
|
||||
//~gfx11! buffer_atomic_add_u32 v10, off, s[32:35], 0 glc ; e0d44000 80080a80
|
||||
//~gfx12! buffer_atomic_add_u32 v10, off, s[32:35], null th:TH_ATOMIC_RETURN ; c40d407c 0090400a 00000000
|
||||
bld.mubuf(aco_opcode::buffer_atomic_add, Definition(op_v1.physReg(), v1), op_s4, Operand(v1),
|
||||
Operand::zero(), op_v1, 0, false)
|
||||
->mubuf()
|
||||
.glc = true;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.mtbuf)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.mtbuf)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst = bld.def(v1);
|
||||
dst.setFixed(PhysReg(256 + 42));
|
||||
@@ -539,73 +591,92 @@ BEGIN_TEST(assembler.gfx11.mtbuf)
|
||||
fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
|
||||
|
||||
/* Addressing */
|
||||
//>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
|
||||
//~gfx11>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
|
||||
//~gfx12>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; c420001e 1900402a 00000080
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0,
|
||||
false);
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt,
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80082a80
|
||||
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] ; c420007c 1900402a 00000080
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false);
|
||||
|
||||
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
|
||||
if (gfx == GFX11)
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42),
|
||||
dfmt, nfmt, 0, false);
|
||||
|
||||
//~gfx11! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
|
||||
//~gfx12! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; c420001e 5900402a 0000000a
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true);
|
||||
|
||||
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
|
||||
//~gfx11! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
|
||||
//~gfx12! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; c420001e 9900402a 0000000a
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.idxen = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
|
||||
//~gfx11! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
|
||||
//~gfx12! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; c420001e d900402a 00000014
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)
|
||||
->mtbuf()
|
||||
.idxen = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
|
||||
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; c420001e 1900402a 00005480
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84,
|
||||
false);
|
||||
|
||||
/* Various flags */
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
|
||||
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] scope:SCOPE_SYS ; c420007c 190c402a 00000080
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.glc = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.dlc = true;
|
||||
|
||||
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.slc = true;
|
||||
|
||||
//; if llvm_ver >= 16:
|
||||
//; if llvm_ver >= 16 and variant == 'gfx11':
|
||||
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
|
||||
//; else:
|
||||
//; elif variant == 'gfx11':
|
||||
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80')
|
||||
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] ; c460007c 1900402a 00000080
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.tfe = true;
|
||||
|
||||
/* Stores */
|
||||
//! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
|
||||
//~gfx11! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
|
||||
//~gfx12! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] scope:SCOPE_SYS ; c421001e 190c400a 00000080
|
||||
bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0,
|
||||
false);
|
||||
|
||||
//! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
|
||||
bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true);
|
||||
//~gfx11! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
|
||||
//~gfx12! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen scope:SCOPE_SYS ; c421401e 590c4014 0000000a
|
||||
bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0,
|
||||
true);
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.mimg)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.mimg)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst_v1 = bld.def(v1);
|
||||
dst_v1.setFixed(PhysReg(256 + 42));
|
||||
@@ -628,64 +699,125 @@ BEGIN_TEST(assembler.gfx11.mimg)
|
||||
Operand op_v4(bld.tmp(v4));
|
||||
op_v4.setFixed(PhysReg(256 + 30));
|
||||
|
||||
//>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; f06c0f00 2010540a
|
||||
//~gfx11>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; f06c0f00 2010540a
|
||||
//~gfx12>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; e7c6c000 10008054 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1);
|
||||
|
||||
//! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
|
||||
//~gfx11! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
|
||||
//~gfx12! image_sample v[84:87], [v20, v21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; e7c6c001 10008054 00001514
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim =
|
||||
ac_image_2d;
|
||||
|
||||
//! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1;
|
||||
//~gfx11! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
|
||||
//~gfx12! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; e446c000 1000802a 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask =
|
||||
0x1;
|
||||
|
||||
/* Various flags */
|
||||
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc = true;
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
|
||||
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D scope:SCOPE_SYS ; e7c6c000 100c8054 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc =
|
||||
true;
|
||||
|
||||
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc = true;
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc =
|
||||
true;
|
||||
|
||||
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc = true;
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc =
|
||||
true;
|
||||
|
||||
//! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().tfe = true;
|
||||
//~gfx11! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
|
||||
//~gfx12! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; e7c6c008 10008054 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().tfe =
|
||||
true;
|
||||
|
||||
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; f06c0f00 2050540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().lwe = true;
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; f06c0f00 2050540a
|
||||
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; e7c6c000 10008154 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().lwe =
|
||||
true;
|
||||
|
||||
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; f06c8f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().r128 = true;
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; f06c8f00 2010540a
|
||||
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; e7c6c010 10008054 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().r128 =
|
||||
true;
|
||||
|
||||
//! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; f06d0f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().a16 = true;
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; f06d0f00 2010540a
|
||||
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; e7c6c040 10008054 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().a16 =
|
||||
true;
|
||||
|
||||
//! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; f06e0f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().d16 = true;
|
||||
//~gfx11! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; f06e0f00 2010540a
|
||||
//~gfx12! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; e7c6c020 10008054 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().d16 =
|
||||
true;
|
||||
|
||||
/* NSA */
|
||||
//! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
|
||||
//~gfx11! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
|
||||
//~gfx12! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; e7c6c001 10008054 0000280a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1,
|
||||
Operand(bld.tmp(v1), PhysReg(256 + 40)))
|
||||
->mimg()
|
||||
.dim = ac_image_2d;
|
||||
|
||||
//~gfx11! image_bvh_intersect_ray v[84:87], [v40, v42, v[44:46], v[48:50], v[52:54]], s[32:35] ; f0648f81 00085428 34302c2a
|
||||
//~gfx12! image_bvh_intersect_ray v[84:87], [v40, v42, v[44:46], v[48:50], v[52:54]], s[32:35] ; d3c64010 34004054 302c2a28
|
||||
aco_ptr<Instruction> instr{
|
||||
create_instruction(aco_opcode::image_bvh_intersect_ray, Format::MIMG, 8, 1)};
|
||||
instr->definitions[0] = dst_v4;
|
||||
instr->operands[0] = op_s4;
|
||||
instr->operands[1] = Operand(s4);
|
||||
instr->operands[2] = Operand(v1);
|
||||
instr->operands[3] = Operand(PhysReg(256 + 40), v1); /* node */
|
||||
instr->operands[4] = Operand(PhysReg(256 + 42), v1); /* tmax */
|
||||
instr->operands[5] = Operand(PhysReg(256 + 44), v3); /* origin */
|
||||
instr->operands[6] = Operand(PhysReg(256 + 48), v3); /* dir */
|
||||
instr->operands[7] = Operand(PhysReg(256 + 52), v3); /* inv dir */
|
||||
instr->mimg().dmask = 0xf;
|
||||
instr->mimg().unrm = true;
|
||||
instr->mimg().r128 = true;
|
||||
bld.insert(std::move(instr));
|
||||
|
||||
/* Stores */
|
||||
//! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
|
||||
//~gfx11! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
|
||||
//~gfx12! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D scope:SCOPE_SYS ; d3c18000 000c801e 0000000a
|
||||
bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1);
|
||||
|
||||
//! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
|
||||
//~gfx11! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
|
||||
//~gfx12! image_atomic_add_uint v10, [v20, v21, v0, v0], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; d3c30001 0000800a 00001514
|
||||
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
|
||||
op_v1, op_v2)
|
||||
->mimg()
|
||||
.dim = ac_image_2d;
|
||||
|
||||
/* Atomic with return */
|
||||
//~gfx11! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D glc ; f0304f04 00100a14
|
||||
//~gfx12! image_atomic_add_uint v10, [v20, v21, v0, v0], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN ; d3c30001 0010800a 00001514
|
||||
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
|
||||
op_v1, op_v2, 0xf, false, false, false, true)
|
||||
->mimg()
|
||||
.dim = ac_image_2d;
|
||||
|
||||
//~gfx11! image_load v[84:87], v[20:21], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0000f04 00105414
|
||||
//~gfx12! image_load v[84:87], [v20, v21], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; d3c00001 00008054 00001514
|
||||
bld.mimg(aco_opcode::image_load, dst_v4, op_s8, Operand(s4), Operand(v1), op_v2)->mimg().dim =
|
||||
ac_image_2d;
|
||||
|
||||
//~gfx11! image_msaa_load v[84:87], v[30:33], s[64:71] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; f060011c 0010541e
|
||||
//~gfx12! image_msaa_load v[84:87], [v30, v31, v32, v33], s[64:71] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; e4460007 00008054 21201f1e
|
||||
bld.mimg(aco_opcode::image_msaa_load, dst_v4, op_s8, Operand(s4), Operand(v1), op_v4, 0x1)
|
||||
->mimg()
|
||||
.dim = ac_image_2darraymsaa;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.flat)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.flat)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst_v1 = bld.def(v1);
|
||||
dst_v1.setFixed(PhysReg(256 + 42));
|
||||
@@ -703,53 +835,73 @@ BEGIN_TEST(assembler.gfx11.flat)
|
||||
op_v2.setFixed(PhysReg(256 + 20));
|
||||
|
||||
/* Addressing */
|
||||
//>> flat_load_b32 v42, v[20:21] ; dc500000 2a7c0014
|
||||
//~gfx11>> flat_load_b32 v42, v[20:21] ; dc500000 2a7c0014
|
||||
//~gfx12>> flat_load_b32 v42, v[20:21] ; ec05007c 0000002a 00000014
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1));
|
||||
|
||||
//! global_load_b32 v42, v[20:21], off ; dc520000 2a7c0014
|
||||
//~gfx11! global_load_b32 v42, v[20:21], off ; dc520000 2a7c0014
|
||||
//~gfx12! global_load_b32 v42, v[20:21], off ; ee05007c 0000002a 00000014
|
||||
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1));
|
||||
|
||||
//! global_load_b32 v42, v10, s[64:65] ; dc520000 2a40000a
|
||||
//~gfx11! global_load_b32 v42, v10, s[64:65] ; dc520000 2a40000a
|
||||
//~gfx12! global_load_b32 v42, v10, s[64:65] ; ee050040 0000002a 0000000a
|
||||
bld.global(aco_opcode::global_load_dword, dst_v1, op_v1, op_s2);
|
||||
|
||||
//! scratch_load_b32 v42, v10, off ; dc510000 2afc000a
|
||||
//~gfx11! scratch_load_b32 v42, v10, off ; dc510000 2afc000a
|
||||
//~gfx12! scratch_load_b32 v42, v10, off ; ed05007c 0002002a 0000000a
|
||||
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, Operand(s1));
|
||||
|
||||
//! scratch_load_b32 v42, off, s32 ; dc510000 2a200080
|
||||
//~gfx11! scratch_load_b32 v42, off, s32 ; dc510000 2a200080
|
||||
//~gfx12! scratch_load_b32 v42, off, s32 ; ed050020 0000002a 00000000
|
||||
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, Operand(v1), op_s1);
|
||||
|
||||
//! scratch_load_b32 v42, v10, s32 ; dc510000 2aa0000a
|
||||
//~gfx11! scratch_load_b32 v42, v10, s32 ; dc510000 2aa0000a
|
||||
//~gfx12! scratch_load_b32 v42, v10, s32 ; ed050020 0002002a 0000000a
|
||||
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, op_s1);
|
||||
|
||||
//! scratch_load_b32 v42, off, off ; dc510000 2a7c0080
|
||||
//~gfx11! scratch_load_b32 v42, off, off ; dc510000 2a7c0080
|
||||
//~gfx12! scratch_load_b32 v42, off, off ; ed05007c 0000002a 00000000
|
||||
bld.scratch(aco_opcode::scratch_load_dword, dst_v1, Operand(v1), Operand(s1));
|
||||
|
||||
//! global_load_b32 v42, v[20:21], off offset:-42 ; dc521fd6 2a7c0014
|
||||
//~gfx11! global_load_b32 v42, v[20:21], off offset:-42 ; dc521fd6 2a7c0014
|
||||
//~gfx12! global_load_b32 v42, v[20:21], off offset:-42 ; ee05007c 0000002a ffffd614
|
||||
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), -42);
|
||||
|
||||
//! global_load_b32 v42, v[20:21], off offset:84 ; dc520054 2a7c0014
|
||||
//~gfx11! global_load_b32 v42, v[20:21], off offset:84 ; dc520054 2a7c0014
|
||||
//~gfx12! global_load_b32 v42, v[20:21], off offset:84 ; ee05007c 0000002a 00005414
|
||||
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), 84);
|
||||
|
||||
/* Various flags */
|
||||
//! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014
|
||||
//~gfx11! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014
|
||||
//~gfx12! flat_load_b32 v42, v[20:21] scope:SCOPE_SYS ; ec05007c 000c002a 00000014
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().slc = true;
|
||||
|
||||
//! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014
|
||||
//~gfx11! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().glc = true;
|
||||
|
||||
//! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014
|
||||
//~gfx11! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().dlc = true;
|
||||
|
||||
/* Stores */
|
||||
//! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14
|
||||
//~gfx11! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14
|
||||
//~gfx12! flat_store_b32 v[20:21], v10 scope:SCOPE_SYS ; ec06807c 050c0000 00000014
|
||||
bld.flat(aco_opcode::flat_store_dword, op_v2, Operand(s1), op_v1);
|
||||
|
||||
/* Atomic with return */
|
||||
//~gfx11! global_atomic_add_u32 v42, v[20:21], v10, off glc ; dcd64000 2a7c0a14
|
||||
//~gfx12! global_atomic_add_u32 v42, v[20:21], v10, off th:TH_ATOMIC_RETURN ; ee0d407c 0510002a 00000014
|
||||
bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().glc =
|
||||
true;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.exp)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.exp)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Operand op[4];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
@@ -758,24 +910,30 @@ BEGIN_TEST(assembler.gfx11.exp)
|
||||
Operand op_m0(bld.tmp(s1));
|
||||
op_m0.setFixed(m0);
|
||||
|
||||
//>> exp mrt3 v1, v0, v3, v2 ; f800003f 02030001
|
||||
//~gfx11>> exp mrt3 v1, v0, v3, v2 ; f800003f 02030001
|
||||
//~gfx12>> export mrt3 v1, v0, v3, v2 ; f800003f 02030001
|
||||
bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3);
|
||||
|
||||
//! exp mrt3 v1, off, v0, off ; f8000035 80008001
|
||||
//~gfx11! exp mrt3 v1, off, v0, off ; f8000035 80008001
|
||||
//~gfx12! export mrt3 v1, off, v0, off ; f8000035 80008001
|
||||
bld.exp(aco_opcode::exp, op[1], Operand(v1), op[0], Operand(v1), 0x5, 3);
|
||||
|
||||
//! exp mrt3 v1, v0, v3, v2 done ; f800083f 02030001
|
||||
//~gfx11! exp mrt3 v1, v0, v3, v2 done ; f800083f 02030001
|
||||
//~gfx12! export mrt3 v1, v0, v3, v2 done ; f800083f 02030001
|
||||
bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3, false, true);
|
||||
|
||||
//>> exp mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001
|
||||
//~gfx11! exp mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001
|
||||
//~gfx12! export mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001
|
||||
bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], op_m0, 0xf, 3)->exp().row_en = true;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.vinterp)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.vinterp)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst = bld.def(v1);
|
||||
dst.setFixed(PhysReg(256 + 42));
|
||||
@@ -837,11 +995,13 @@ BEGIN_TEST(assembler.gfx11.vinterp)
|
||||
.clamp = true;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.ldsdir)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.ldsdir)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst = bld.def(v1);
|
||||
dst.setFixed(PhysReg(256 + 42));
|
||||
@@ -852,39 +1012,52 @@ BEGIN_TEST(assembler.gfx11.ldsdir)
|
||||
//! llvm_version: #llvm_ver
|
||||
fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
|
||||
|
||||
//>> lds_direct_load v42 wait_vdst:15 ; ce1f002a
|
||||
//~gfx11>> lds_direct_load v42 wait_vdst:15 ; ce1f002a
|
||||
//~gfx12>> ds_direct_load v42 wait_va_vdst:15 wait_vm_vsrc:1 ; ce9f002a
|
||||
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 15;
|
||||
|
||||
//! lds_direct_load v42 wait_vdst:6 ; ce16002a
|
||||
//~gfx11! lds_direct_load v42 wait_vdst:6 ; ce16002a
|
||||
//~gfx12! ds_direct_load v42 wait_va_vdst:6 wait_vm_vsrc:1 ; ce96002a
|
||||
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 6;
|
||||
|
||||
//; if llvm_ver >= 18:
|
||||
//; if llvm_ver >= 18 and variant == 'gfx11':
|
||||
//; insert_pattern('lds_direct_load v42 wait_vdst:0 ; ce10002a')
|
||||
//; else:
|
||||
//; elif variant == 'gfx11':
|
||||
//; insert_pattern('lds_direct_load v42 ; ce10002a')
|
||||
//~gfx12! ds_direct_load v42 wait_va_vdst:0 wait_vm_vsrc:1 ; ce90002a
|
||||
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 0;
|
||||
|
||||
//! lds_param_load v42, attr56.x wait_vdst:8 ; ce08e02a
|
||||
//~gfx11! lds_param_load v42, attr56.x wait_vdst:8 ; ce08e02a
|
||||
//~gfx12! ds_param_load v42, attr56.x wait_va_vdst:8 wait_vm_vsrc:1 ; ce88e02a
|
||||
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 8;
|
||||
|
||||
//; if llvm_ver >= 18:
|
||||
//; if llvm_ver >= 18 and variant == 'gfx11':
|
||||
//; insert_pattern('lds_param_load v42, attr56.x wait_vdst:0 ; ce00e02a')
|
||||
//; else:
|
||||
//; elif variant == 'gfx11':
|
||||
//; insert_pattern('lds_param_load v42, attr56.x ; ce00e02a')
|
||||
//~gfx12! ds_param_load v42, attr56.x wait_va_vdst:0 wait_vm_vsrc:1 ; ce80e02a
|
||||
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 0;
|
||||
|
||||
//! lds_param_load v42, attr34.y wait_vdst:8 ; ce08892a
|
||||
//~gfx11! lds_param_load v42, attr34.y wait_vdst:8 ; ce08892a
|
||||
//~gfx12! ds_param_load v42, attr34.y wait_va_vdst:8 wait_vm_vsrc:1 ; ce88892a
|
||||
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 34, 1)->ldsdir().wait_vdst = 8;
|
||||
|
||||
//! lds_param_load v42, attr12.z wait_vdst:8 ; ce08322a
|
||||
//~gfx11! lds_param_load v42, attr12.z wait_vdst:8 ; ce08322a
|
||||
//~gfx12! ds_param_load v42, attr12.z wait_va_vdst:8 wait_vm_vsrc:1 ; ce88322a
|
||||
bld.ldsdir(aco_opcode::lds_param_load, dst, op, 12, 2)->ldsdir().wait_vdst = 8;
|
||||
|
||||
//~gfx11>> lds_direct_load v42 wait_vdst:15 ; ce1f002a
|
||||
//~gfx12>> ds_direct_load v42 wait_va_vdst:15 wait_vm_vsrc:0 ; ce1f002a
|
||||
bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vsrc = 0;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.gfx11.vop12c_v128)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
BEGIN_TEST(assembler.vop12c_v128)
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst_v0 = bld.def(v1);
|
||||
dst_v0.setFixed(PhysReg(256));
|
||||
@@ -978,11 +1151,13 @@ BEGIN_TEST(assembler.gfx11.vop12c_v128)
|
||||
.abs[0] = true;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.vop3_dpp)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst_v0 = bld.def(v1);
|
||||
dst_v0.setFixed(PhysReg(256));
|
||||
@@ -1024,8 +1199,9 @@ BEGIN_TEST(assembler.vop3_dpp)
|
||||
bld.vop1_e64_dpp(aco_opcode::v_sqrt_f32, dst_v0, op_v1, dpp_row_rr(1))->valu().clamp = true;
|
||||
|
||||
//! v_cmp_lt_f32_e64_dpp s[4:5], |v1|, |v2| row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d4110304 000204fa ffad2101
|
||||
bld.vopc_e64_dpp(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2, dpp_row_rr(1))->valu().abs =
|
||||
0x3;
|
||||
bld.vopc_e64_dpp(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2, dpp_row_rr(1))
|
||||
->valu()
|
||||
.abs = 0x3;
|
||||
|
||||
//! v_add_f32_e64_dpp v0, v1, v2 mul:4 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; d5030000 100204ea 00000001
|
||||
bld.vop2_e64_dpp8(aco_opcode::v_add_f32, dst_v0, op_v1, op_v2)->valu().omod = 2;
|
||||
@@ -1037,11 +1213,13 @@ BEGIN_TEST(assembler.vop3_dpp)
|
||||
bld.vopc_e64_dpp8(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2)->valu().abs = 0x1;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.vopd)
|
||||
if (!setup_cs(NULL, GFX11))
|
||||
return;
|
||||
for (amd_gfx_level gfx : filter_gfx_levels({GFX11, GFX12})) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition dst_v0 = bld.def(v1);
|
||||
dst_v0.setFixed(PhysReg(256));
|
||||
@@ -1069,7 +1247,8 @@ BEGIN_TEST(assembler.vopd)
|
||||
|
||||
//>> BB0:
|
||||
//! v_dual_mov_b32 v0, v0 :: v_dual_mov_b32 v1, v1 ; ca100100 00000101
|
||||
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, aco_opcode::v_dual_mov_b32);
|
||||
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1,
|
||||
aco_opcode::v_dual_mov_b32);
|
||||
|
||||
//! v_dual_mov_b32 v0, 0x60 :: v_dual_mov_b32 v1, s0 ; ca1000ff 00000000 00000060
|
||||
bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, Operand::c32(96), op_s0,
|
||||
@@ -1095,6 +1274,17 @@ BEGIN_TEST(assembler.vopd)
|
||||
bld.vopd(aco_opcode::v_dual_cndmask_b32, dst_v0, dst_v1, op_v0, op_v1, op_vcc, op_v2, op_v3,
|
||||
op_vcc, aco_opcode::v_dual_cndmask_b32);
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.pseudo_scalar_trans)
|
||||
if (LLVM_VERSION_MAJOR < 19 || !setup_cs(NULL, GFX12))
|
||||
return;
|
||||
|
||||
//>> v_s_sqrt_f32 s5, s1 ; d6880005 00000001
|
||||
bld.vop3(aco_opcode::v_s_sqrt_f32, Definition(PhysReg(5), s1), Operand(PhysReg(1), s1));
|
||||
|
||||
finish_assembler_test();
|
||||
END_TEST
|
||||
|
||||
|
Reference in New Issue
Block a user