From 45e935800a813a05e6b74ae1f7e8dfa44b24dcdb Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 7 Sep 2024 15:05:25 +0200 Subject: [PATCH] aco: implement nir_shared_append/consume_amd Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_builder_h.py | 2 +- .../compiler/aco_instruction_selection.cpp | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index f13f1624c75..e0019a114cc 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -558,7 +558,7 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6) ("sopp", [Format.SOPP], itertools.product([0, 1], [0, 1])), ("sopc", [Format.SOPC], [(1, 2)]), ("smem", [Format.SMEM], [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]), - ("ds", [Format.DS], [(1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]), + ("ds", [Format.DS], [(1, 0), (1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]), ("ldsdir", [Format.LDSDIR], [(1, 1)]), ("mubuf", [Format.MUBUF], [(0, 4), (1, 3), (1, 4)]), ("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]), diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index b0c662ef78c..74dec748d65 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7572,6 +7572,32 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr) ctx->block->instructions.emplace_back(std::move(ds)); } +void +visit_shared_append(isel_context* ctx, nir_intrinsic_instr* instr) +{ + Builder bld(ctx->program, ctx->block); + unsigned address = nir_intrinsic_base(instr); + assert(address <= 65535 && (address % 4 == 0)); + + aco_opcode op; + switch (instr->intrinsic) { + case nir_intrinsic_shared_append_amd: op = aco_opcode::ds_append; break; + case nir_intrinsic_shared_consume_amd: op = aco_opcode::ds_consume; break; + default: unreachable("not shared_append/consume"); + } + + Temp tmp = bld.tmp(v1); + Instruction *ds; + Operand m = load_lds_size_m0(bld); + if (m.isUndefined()) + ds = bld.ds(op, Definition(tmp), address); + else + ds = bld.ds(op, Definition(tmp), m, address); + ds->ds().sync = memory_sync_info(storage_shared, semantic_atomicrmw); + + bld.pseudo(aco_opcode::p_as_uniform, Definition(get_ssa_temp(ctx, &instr->def)), tmp); +} + void visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr) { @@ -8324,6 +8350,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break; case nir_intrinsic_shared_atomic: case nir_intrinsic_shared_atomic_swap: visit_shared_atomic(ctx, instr); break; + case nir_intrinsic_shared_append_amd: + case nir_intrinsic_shared_consume_amd: visit_shared_append(ctx, instr); break; case nir_intrinsic_load_shared2_amd: case nir_intrinsic_store_shared2_amd: visit_access_shared2_amd(ctx, instr); break; case nir_intrinsic_bindless_image_load: