From d21054b4bc92a1a9240841dca719f81a142fd5cc Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 10 Feb 2023 16:22:38 +0100 Subject: [PATCH] r600/sfn: Add pass to split addess and index register loads Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/meson.build | 2 + .../r600/sfn/sfn_split_address_loads.cpp | 379 ++++++++++++++++ .../r600/sfn/sfn_split_address_loads.h | 38 ++ .../drivers/r600/sfn/tests/meson.build | 2 +- .../tests/sfn_split_address_loads_test.cpp | 412 ++++++++++++++++++ 5 files changed, 832 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_split_address_loads.h create mode 100644 src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index a2b644d8688..85911d66912 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -174,6 +174,8 @@ files_r600 = files( 'sfn/sfn_shader_tess.h', 'sfn/sfn_shader_vs.cpp', 'sfn/sfn_shader_vs.h', + 'sfn/sfn_split_address_loads.cpp', + 'sfn/sfn_split_address_loads.h', 'sfn/sfn_valuefactory.cpp', 'sfn/sfn_valuefactory.h', 'sfn/sfn_virtualvalues.cpp', diff --git a/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp new file mode 100644 index 00000000000..11ccae627c9 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.cpp @@ -0,0 +1,379 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2022 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_split_address_loads.h" +#include "r600_isa.h" +#include "sfn_alu_defines.h" +#include "sfn_defines.h" +#include "sfn_instr_alugroup.h" +#include "sfn_instr_fetch.h" +#include "sfn_instr_mem.h" +#include "sfn_instr_tex.h" +#include "sfn_instr_export.h" + +namespace r600 { + + +class AddressSplitVisitor : public InstrVisitor { +public: + AddressSplitVisitor(Shader& sh); + +private: + void visit(AluInstr *instr) override; + void visit(AluGroup *instr) override; + void visit(TexInstr *instr) override; + void visit(ExportInstr *instr) override; + void visit(FetchInstr *instr) override; + void visit(Block *instr) override; + void visit(ControlFlowInstr *instr) override; + void visit(IfInstr *instr) override; + void visit(ScratchIOInstr *instr) override; + void visit(StreamOutInstr *instr) override; + void visit(MemRingOutInstr *instr) override; + void visit(EmitVertexInstr *instr) override; + void visit(GDSInstr *instr) override; + void visit(WriteTFInstr *instr) override; + void visit(LDSAtomicInstr *instr) override; + void visit(LDSReadInstr *instr) override; + void visit(RatInstr *instr) override; + + void load_ar(Instr *instr, PRegister addr); + auto load_index_register(Instr *instr, PRegister index) -> int; + auto load_index_register_eg(Instr *instr, PRegister index) -> int; + auto load_index_register_ca(PRegister index) -> int; + auto reuse_loaded_idx(PRegister index) -> int; + auto pick_idx() -> int ; + + ValueFactory& m_vf; + r600_chip_class m_chip_class; + + Block::iterator m_block_iterator; + Block *m_current_block{nullptr}; + PRegister m_current_addr{nullptr}; + PRegister m_current_idx[2] {nullptr, nullptr}; + PRegister m_current_idx_src[2] {nullptr, nullptr}; + + + std::list m_last_ar_use; + AluInstr *m_last_ar_load{nullptr}; + + unsigned m_linear_index{0}; + unsigned m_last_idx_load_index[2] {0,0}; + AluInstr *m_last_idx_load[2] {nullptr, nullptr}; + std::list m_last_idx_use[2]; + Instr *m_last_non_alu{nullptr}; + +}; + + +bool split_address_loads(Shader& sh) +{ + AddressSplitVisitor visitor(sh); + for (auto block : sh.func()) { + block->accept(visitor); + } + return true; +} + +AddressSplitVisitor::AddressSplitVisitor(Shader& sh): + m_vf(sh.value_factory()), + m_chip_class(sh.chip_class()) +{ +} + +class CollectDeps : public ConstRegisterVisitor { +public: + void visit(const Register& r) override + { + for (auto p : r.parents()) + add_dep(p); + } + void visit(const LocalArray& value) override {(void)value; unreachable("Array is not a value");} + void visit(const LocalArrayValue& r) override + { + auto& a = r.array(); + for (auto reg : a) { + if (!instr->dest() || !reg->equal_to(*instr->dest())) { + for (auto p : reg->parents()) { + if ((instr->block_id() == p->block_id()) && + (instr->index() > p->index())) + add_dep(p); + } + } + } + } + void visit(const UniformValue& value) override {(void)value;} + void visit(const LiteralConstant& value) override {(void)value;} + void visit(const InlineConstant& value) override {(void)value;} + + void add_dep(Instr *p) { + + auto alu = p->as_alu(); + if (!alu || alu_level > 1) { + instr->add_required_instr(p); + } else { + ++alu_level; + for (auto& s : alu->sources()) { + if (!alu->dest() || !alu->dest()->equal_to(*s)) + s->accept(*this); + } + --alu_level; + } + } + int alu_level{0}; + + AluInstr *instr; +}; + + +void AddressSplitVisitor::visit(AluInstr *instr) +{ + auto [addr, is_for_dest, index] = instr->indirect_addr(); + + if (addr) { + assert(!index); + + if (!m_current_addr || !m_current_addr->equal_to(*addr)) + load_ar(instr, addr); + + // Do this with a visitor to catch also local array values + CollectDeps collector; + collector.instr = m_last_ar_load; + for (auto& s : instr->sources()) { + s->accept(collector); + } + + instr->update_indirect_addr(m_vf.addr()); + addr->del_use(instr); + m_last_ar_load->inc_ar_uses(); + m_last_ar_use.push_back(instr); + } + + if (index) + load_index_register(instr, index); +} + +auto AddressSplitVisitor::load_index_register(Instr *instr, PRegister index) -> int +{ + int idx_id = m_chip_class < ISA_CC_CAYMAN ? + load_index_register_eg(instr, index): + load_index_register_ca(index); + + m_last_idx_use[idx_id].push_back(instr); + + index->del_use(instr); + instr->update_indirect_addr(m_current_idx[idx_id]); + m_last_idx_load_index[idx_id] = (instr->block_id() << 16) | instr->index(); + return idx_id == 0 ? bim_zero : bim_one; +} + +auto AddressSplitVisitor::load_index_register_eg(Instr *instr, + PRegister index) -> int +{ + int idx_id = reuse_loaded_idx(index); + if (idx_id < 0) { + load_ar(instr, index); + + idx_id = pick_idx(); + auto idx = m_vf.idx_reg(idx_id); + + const EAluOp idx_op[2] = {op1_set_cf_idx0, op1_set_cf_idx1}; + + m_last_idx_load[idx_id] = new AluInstr(idx_op[idx_id], idx, m_vf.addr(), {}); + m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]); + for (auto&& i : m_last_idx_use[idx_id]) + m_last_idx_load[idx_id]->add_required_instr(i); + m_last_idx_use[idx_id].clear(); + + m_last_ar_load->inc_ar_uses(); + m_last_ar_use.push_back(m_last_idx_load[idx_id]); + m_current_idx[idx_id] = idx; + m_current_idx_src[idx_id] = index; + + } + return idx_id; +} + +auto AddressSplitVisitor::load_index_register_ca(PRegister index) -> int +{ + int idx_id = reuse_loaded_idx(index); + if (idx_id < 0) { + idx_id = pick_idx(); + auto idx = m_vf.idx_reg(idx_id); + m_last_idx_load[idx_id] = new AluInstr(op1_mova_int, idx, index, {}); + + m_current_block->insert(m_block_iterator, m_last_idx_load[idx_id]); + for (auto&& i : m_last_idx_use[idx_id]) + m_last_idx_load[idx_id]->add_required_instr(i); + m_last_idx_use[idx_id].clear(); + m_current_idx[idx_id] = idx; + m_current_idx_src[idx_id] = index; + + } + return idx_id; +} + +auto AddressSplitVisitor::reuse_loaded_idx(PRegister index) -> int +{ + for (int i = 0; i < 2; ++i) { + if (m_current_idx_src[i] && m_current_idx_src[i]->equal_to(*index)) { + return i; + } + } + return -1; +} + +auto AddressSplitVisitor::pick_idx() -> int +{ + int idx_id = -1; + if (!m_current_idx[0]) { + idx_id = 0; + } else if (!m_current_idx[1]) { + idx_id = 1; + } else { + idx_id = m_last_idx_load_index[0] < m_last_idx_load_index[1] ? 0 : 1; + } + return idx_id; +} + + +void AddressSplitVisitor::load_ar(Instr *instr, PRegister addr) +{ + auto ar = m_vf.addr(); + + m_last_ar_load = new AluInstr(op1_mova_int, ar, addr, {}); + m_current_block->insert(m_block_iterator, m_last_ar_load); + ar->add_use(instr); + m_current_addr = addr; + for (auto& i : m_last_ar_use) { + m_last_ar_load->add_required_instr(i); + } + if (m_last_non_alu) { + m_last_ar_load->add_required_instr(m_last_non_alu); + } + m_last_ar_use.clear(); +} + + +void AddressSplitVisitor::visit(AluGroup *instr) +{ + for (auto& i : *instr) + if (i) + this->visit(i); +} + +void AddressSplitVisitor::visit(TexInstr *instr) +{ + if (instr->resource_offset()) + load_index_register(instr, instr->resource_offset()); + m_last_non_alu = instr; +} +void AddressSplitVisitor::visit(ExportInstr *instr) +{ + (void)instr; +} + +void AddressSplitVisitor::visit(FetchInstr *instr) +{ + if (instr->resource_offset()) + load_index_register(instr, instr->resource_offset()); + m_last_non_alu = instr; +} + +void AddressSplitVisitor::visit(Block *instr) +{ + m_current_block = instr; + m_block_iterator = instr->begin(); + m_last_ar_load = nullptr; + m_current_addr = nullptr; + m_last_ar_use.clear(); + auto e = instr->end(); + while (m_block_iterator != e) { + (*m_block_iterator)->accept(*this); + ++m_block_iterator; + } + + // renumber instructions + int new_index = 0; + for (auto&& i : *instr) + i->set_blockid(m_current_block->id(), new_index++); +} +void AddressSplitVisitor::visit(ControlFlowInstr *instr) +{ + (void)instr; +} +void AddressSplitVisitor::visit(IfInstr *instr) +{ + visit(instr->predicate()); +} +void AddressSplitVisitor::visit(ScratchIOInstr *instr) +{ + m_last_non_alu = instr; + (void)instr; +} +void AddressSplitVisitor::visit(StreamOutInstr *instr) +{ + m_last_non_alu = instr; + (void)instr; +} +void AddressSplitVisitor::visit(MemRingOutInstr *instr) +{ + m_last_non_alu = instr; + (void)instr; +} +void AddressSplitVisitor::visit(EmitVertexInstr *instr) +{ + m_last_non_alu = instr; + (void)instr; +} +void AddressSplitVisitor::visit(GDSInstr *instr) +{ + if (instr->resource_offset()) + load_index_register(instr, instr->resource_offset()); + m_last_non_alu = instr; +} +void AddressSplitVisitor::visit(WriteTFInstr *instr) +{ + m_last_non_alu = instr; + (void)instr; +} + +void AddressSplitVisitor::visit(LDSAtomicInstr *instr) +{ + +} +void AddressSplitVisitor::visit(LDSReadInstr *instr) +{ + +} +void AddressSplitVisitor::visit(RatInstr *instr) +{ + if (instr->resource_offset()) + load_index_register(instr, instr->resource_offset()); + m_last_non_alu = instr; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_split_address_loads.h b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.h new file mode 100644 index 00000000000..b68a832abc5 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_split_address_loads.h @@ -0,0 +1,38 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2022 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef R600_SFN_SPLIT_ADDRESS_LOADS_H +#define R600_SFN_SPLIT_ADDRESS_LOADS_H + +#include "sfn_shader.h" + +namespace r600 { + +bool split_address_loads(Shader& sh); + +} + +#endif // R600_SFN_SPLIT_ADDRESS_LOADS_H diff --git a/src/gallium/drivers/r600/sfn/tests/meson.build b/src/gallium/drivers/r600/sfn/tests/meson.build index 8bda27188bc..d0f4d3b4087 100644 --- a/src/gallium/drivers/r600/sfn/tests/meson.build +++ b/src/gallium/drivers/r600/sfn/tests/meson.build @@ -23,7 +23,7 @@ r600_test_dep = declare_dependency( if with_tests foreach t : ['valuefactory', 'value', 'instr', 'instrfromstring', 'liverange', - 'optimizer', 'shaderfromstring' ] + 'optimizer', 'shaderfromstring', 'split_address_loads' ] test( t, executable('test-@0@-r600-sfn'.format(t), diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp new file mode 100644 index 00000000000..5bb81ff20fc --- /dev/null +++ b/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp @@ -0,0 +1,412 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2023 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_test_shaders.h" +#include "../sfn_split_address_loads.h" +#include "../sfn_optimizer.h" +#include "../sfn_scheduler.h" + + +using namespace r600; + +TEST_F(TestShaderFromNir, SimpleLoadAddress) +{ + const char *input = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x__ +ARRAYS A1[4].x +REGISTERS AR +SHADER +ALU ADD A1[R0.x].x : L[0xbf000000] KC0[0].x {WL} +ALU MOV S1.x@group : A1[0].x {WL} +EXPORT_DONE PIXEL 0 S1.xxxx +)"; + + const char *expect = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x__ +ARRAYS A1[4].x +REGISTERS AR +SHADER +ALU MOVA_INT AR : R0.x +ALU ADD A1[AR].x : L[0xbf000000] KC0[0].x {WL} +ALU MOV S1.x@group : A1[0].x {WL} +EXPORT_DONE PIXEL 0 S1.xxxx +)"; + + auto sh = from_string(input); + split_address_loads(*sh); + check(sh, expect); +} + + +TEST_F(TestShaderFromNir, DestIndirectAddress) +{ + const char *input = + R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +OUTPUT LOC:0 NAME:0 MASK:15 +OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10 +OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11 +OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12 +OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13 +REGISTERS R1.xyzw +ARRAYS A2[4].xy A2[4].zw +SHADER +ALU MUL_IEEE S14.x : KC0[2].x R1.y@fully {W} +ALU MUL_IEEE S14.y : KC0[2].y R1.y@fully {W} +ALU MUL_IEEE S14.z : KC0[2].z R1.y@fully {W} +ALU MUL_IEEE S14.w : KC0[2].w R1.y@fully {WL} +ALU MULADD_IEEE S15.x : KC0[1].x R1.x@fully S14.x {W} +ALU MULADD_IEEE S15.y : KC0[1].y R1.x@fully S14.y {W} +ALU MULADD_IEEE S15.z : KC0[1].z R1.x@fully S14.z {W} +ALU MULADD_IEEE S15.w : KC0[1].w R1.x@fully S14.w {WL} +ALU MULADD_IEEE S17.x : KC0[3].x R1.z@fully S15.x {W} +ALU MULADD_IEEE S17.y : KC0[3].y R1.z@fully S15.y {W} +ALU MULADD_IEEE S17.z : KC0[3].z R1.z@fully S15.z {W} +ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL} +ALU MULADD_IEEE S19.x@group : KC0[4].x R1.w@fully S17.x {W} +ALU MULADD_IEEE S19.y@group : KC0[4].y R1.w@fully S17.y {W} +ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W} +ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL} +ALU MOV A2[0].x : I[1.0] {W} +ALU MOV A2[0].y : L[0x3f8ccccd] {WL} +ALU MOV A2[1].x : L[0x40000000] {W} +ALU MOV A2[1].y : L[0x40066666] {WL} +ALU MOV A2[2].x : L[0x40400000] {W} +ALU MOV A2[2].y : L[0x40466666] {WL} +ALU MOV A2[3].x : L[0x40800000] {W} +ALU MOV A2[3].y : L[0x40833333] {WL} +ALU MOV A2[0].z : L[0x40a00000] {W} +ALU MOV A2[0].w : L[0x40a33333] {WL} +ALU MOV A2[1].z : L[0x40c00000] {W} +ALU MOV A2[1].w : L[0x40c33333] {WL} +ALU MOV A2[2].z : L[0x40e00000] {W} +ALU MOV A2[2].w : L[0x40e33333] {WL} +ALU MOV A2[3].z : L[0x41000000] {W} +ALU MOV A2[3].w : L[0x4101999a] {WL} +IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) + ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL} + ALU MOV A2[S34.x].z : I[0] {W} + ALU MOV A2[S34.x].w : L[0x3dcccccd] {WL} +ELSE + ALU MOV S37.x : KC0[0].x {WL} + ALU MOV A2[S37.x].x : I[0] {W} + ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL} +ENDIF +EXPORT_DONE POS 0 S19.xyzw +ALU MOV S46.x@group : A2[0].x {W} +ALU MOV S46.y@group : A2[0].y {W} +ALU MOV S46.z@group : A2[1].x {W} +ALU MOV S46.w@group : A2[1].y {WL} +EXPORT PARAM 0 S46.xyzw +ALU MOV S47.x@group : A2[2].x {W} +ALU MOV S47.y@group : A2[2].y {W} +ALU MOV S47.z@group : A2[3].x {W} +ALU MOV S47.w@group : A2[3].y {WL} +EXPORT PARAM 1 S47.xyzw +ALU MOV S48.x@group : A2[0].z {W} +ALU MOV S48.y@group : A2[0].w {W} +ALU MOV S48.z@group : A2[1].z {W} +ALU MOV S48.w@group : A2[1].w {WL} +EXPORT PARAM 2 S48.xyzw +ALU MOV S49.x@group : A2[2].z {W} +ALU MOV S49.y@group : A2[2].w {W} +ALU MOV S49.z@group : A2[3].z {W} +ALU MOV S49.w@group : A2[3].w {WL} +EXPORT_DONE PARAM 3 S49.xyzw +)"; + + + const char *expect = + R"(VS +CHIPCLASS EVERGREEN +INPUT LOC:0 NAME:0 +OUTPUT LOC:0 NAME:0 MASK:15 +OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10 +OUTPUT LOC:2 NAME:5 MASK:15 SID:10 SPI_SID:11 +OUTPUT LOC:3 NAME:5 MASK:15 SID:11 SPI_SID:12 +OUTPUT LOC:4 NAME:5 MASK:15 SID:12 SPI_SID:13 +REGISTERS R1.xyzw +ARRAYS A2[4].xy A2[4].zw +SHADER +ALU MUL_IEEE S14.x : KC0[2].x R1.y@fully {W} +ALU MUL_IEEE S14.y : KC0[2].y R1.y@fully {W} +ALU MUL_IEEE S14.z : KC0[2].z R1.y@fully {W} +ALU MUL_IEEE S14.w : KC0[2].w R1.y@fully {WL} +ALU MULADD_IEEE S15.x : KC0[1].x R1.x@fully S14.x {W} +ALU MULADD_IEEE S15.y : KC0[1].y R1.x@fully S14.y {W} +ALU MULADD_IEEE S15.z : KC0[1].z R1.x@fully S14.z {W} +ALU MULADD_IEEE S15.w : KC0[1].w R1.x@fully S14.w {WL} +ALU MULADD_IEEE S17.x : KC0[3].x R1.z@fully S15.x {W} +ALU MULADD_IEEE S17.y : KC0[3].y R1.z@fully S15.y {W} +ALU MULADD_IEEE S17.z : KC0[3].z R1.z@fully S15.z {W} +ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL} +ALU MULADD_IEEE S19.x@group : KC0[4].x R1.w@fully S17.x {W} +ALU MULADD_IEEE S19.y@group : KC0[4].y R1.w@fully S17.y {W} +ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W} +ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL} +ALU MOV A2[0].x : I[1.0] {W} +ALU MOV A2[0].y : L[0x3f8ccccd] {WL} +ALU MOV A2[1].x : L[0x40000000] {W} +ALU MOV A2[1].y : L[0x40066666] {WL} +ALU MOV A2[2].x : L[0x40400000] {W} +ALU MOV A2[2].y : L[0x40466666] {WL} +ALU MOV A2[3].x : L[0x40800000] {W} +ALU MOV A2[3].y : L[0x40833333] {WL} +ALU MOV A2[0].z : L[0x40a00000] {W} +ALU MOV A2[0].w : L[0x40a33333] {WL} +ALU MOV A2[1].z : L[0x40c00000] {W} +ALU MOV A2[1].w : L[0x40c33333] {WL} +ALU MOV A2[2].z : L[0x40e00000] {W} +ALU MOV A2[2].w : L[0x40e33333] {WL} +ALU MOV A2[3].z : L[0x41000000] {W} +ALU MOV A2[3].w : L[0x4101999a] {WL} +IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) + ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL} + ALU MOVA_INT AR : S34.x + ALU MOV A2[AR].z : I[0] {W} + ALU MOV A2[AR].w : L[0x3dcccccd] {WL} +ELSE + ALU MOV S37.x : KC0[0].x {WL} + ALU MOVA_INT AR : S37.x + ALU MOV A2[AR].x : I[0] {W} + ALU MOV A2[AR].y : L[0x3dcccccd] {WL} +ENDIF +EXPORT_DONE POS 0 S19.xyzw +ALU MOV S46.x@group : A2[0].x {W} +ALU MOV S46.y@group : A2[0].y {W} +ALU MOV S46.z@group : A2[1].x {W} +ALU MOV S46.w@group : A2[1].y {WL} +EXPORT PARAM 0 S46.xyzw +ALU MOV S47.x@group : A2[2].x {W} +ALU MOV S47.y@group : A2[2].y {W} +ALU MOV S47.z@group : A2[3].x {W} +ALU MOV S47.w@group : A2[3].y {WL} +EXPORT PARAM 1 S47.xyzw +ALU MOV S48.x@group : A2[0].z {W} +ALU MOV S48.y@group : A2[0].w {W} +ALU MOV S48.z@group : A2[1].z {W} +ALU MOV S48.w@group : A2[1].w {WL} +EXPORT PARAM 2 S48.xyzw +ALU MOV S49.x@group : A2[2].z {W} +ALU MOV S49.y@group : A2[2].w {W} +ALU MOV S49.z@group : A2[3].z {W} +ALU MOV S49.w@group : A2[3].w {WL} +EXPORT_DONE PARAM 3 S49.xyzw +)"; + + auto sh = from_string(input); + split_address_loads(*sh); + check(sh, expect); +} + + + + +TEST_F(TestShaderFromNir, SimpleLoadIndexEG) +{ + const char *input = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x +ARRAYS A1[4].x +REGISTERS AR +SHADER +ALU ADD S1.x : L[0xbf000000] KC0[R0.x][0].x {WL} +EXPORT_DONE PIXEL 0 S1.xxxx +)"; + + const char *expect = +R"( +FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x +ARRAYS A1[4].x +REGISTERS AR +SHADER +ALU MOVA_INT AR : R0.x +ALU SET_CF_IDX0 IDX0 : AR +ALU ADD S1.x@group : L[0xbf000000] KC0[IDX0][0].x {WL} +EXPORT_DONE PIXEL 0 S1.xxxx +)"; + + auto sh = from_string(input); + split_address_loads(*sh); + check(sh, expect); +} + +TEST_F(TestShaderFromNir, SimpleLoadIndexCA) +{ + const char *input = +R"( +FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x +ARRAYS A1[4].x +REGISTERS AR +SHADER +ALU ADD S1.x : L[0xbf000000] KC0[R0.x][0].x {WL} +EXPORT_DONE PIXEL 0 S1.xxxx +)"; + + const char *expect = +R"( +FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x +ARRAYS A1[4].x +REGISTERS AR +SHADER +ALU MOVA_INT IDX0 : R0.x +ALU ADD S1.x@group : L[0xbf000000] KC0[IDX0][0].x {WL} +EXPORT_DONE PIXEL 0 S1.xxxx +)"; + + auto sh = from_string(input); + split_address_loads(*sh); + check(sh, expect); +} + + +TEST_F(TestShaderFromNir, SimpleLoadIndexBuf) +{ + const char *input = +R"( +FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x R0.y +REGISTERS AR +SHADER +LOAD_BUF S1.xyzw : R0.x + 16b RID:10 + R0.y +EXPORT_DONE PIXEL 0 S1.xyzw +)"; + + const char *expect = +R"( +FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +OUTPUT LOC:0 NAME:1 MASK:15 +SYSVALUES R0.x R0.y +SHADER +ALU MOVA_INT IDX0 : R0.y +LOAD_BUF S1.xyzw : R0.x + 16b RID:10 + IDX0 +EXPORT_DONE PIXEL 0 S1.xyzw +)"; + + auto sh = from_string(input); + split_address_loads(*sh); + check(sh, expect); +} + + +TEST_F(TestShaderFromNir, SplitLoadIndexConst) +{ + const char *input = +R"( +FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:0 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +BLOCK_START + ALU MIN_UINT S3.w@free{s} : KC0[0].x L[0x2] {WL} + ALU MOV S4.x@group{s} : KC1[S3.w@free{s}][0].x {W} + ALU MOV S4.y@group{s} : KC1[S3.w@free{s}][0].y {W} + ALU MOV S4.z@group{s} : KC1[S3.w@free{s}][0].z {W} + ALU MOV S4.w@group{s} : KC1[S3.w@free{s}][0].w {WL} + EXPORT_DONE PIXEL 0 S4.xyzw +BLOCK_END +)"; + + const char *expect = +R"( +FS +CHIPCLASS CAYMAN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +PROP WRITE_ALL_COLORS:0 +OUTPUT LOC:0 NAME:1 MASK:15 +SHADER +BLOCK_START + ALU MIN_UINT S3.w@free{s} : KC0[0].x L[0x2] {WL} + ALU MOVA_INT IDX0 : S3.w@free{s} {} + ALU MOV S4.x@group{s} : KC1[IDX0][0].x {W} + ALU MOV S4.y@group{s} : KC1[IDX0][0].y {W} + ALU MOV S4.z@group{s} : KC1[IDX0][0].z {W} + ALU MOV S4.w@group{s} : KC1[IDX0][0].w {WL} + EXPORT_DONE PIXEL 0 S4.xyzw +BLOCK_END +)"; + auto sh = from_string(input); + split_address_loads(*sh); + check(sh, expect); +} + + + +