diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 25c625e577d..eb74d9da931 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -960,6 +960,79 @@ private: }; }; +struct RegisterDemand { + constexpr RegisterDemand() = default; + constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {} + int16_t vgpr = 0; + int16_t sgpr = 0; + + constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept + { + return a.vgpr == b.vgpr && a.sgpr == b.sgpr; + } + + constexpr bool exceeds(const RegisterDemand other) const noexcept + { + return vgpr > other.vgpr || sgpr > other.sgpr; + } + + constexpr RegisterDemand operator+(const Temp t) const noexcept + { + if (t.type() == RegType::sgpr) + return RegisterDemand(vgpr, sgpr + t.size()); + else + return RegisterDemand(vgpr + t.size(), sgpr); + } + + constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept + { + return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr); + } + + constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept + { + return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr); + } + + constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept + { + vgpr += other.vgpr; + sgpr += other.sgpr; + return *this; + } + + constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept + { + vgpr -= other.vgpr; + sgpr -= other.sgpr; + return *this; + } + + constexpr RegisterDemand& operator+=(const Temp t) noexcept + { + if (t.type() == RegType::sgpr) + sgpr += t.size(); + else + vgpr += t.size(); + return *this; + } + + constexpr RegisterDemand& operator-=(const Temp t) noexcept + { + if (t.type() == RegType::sgpr) + sgpr -= t.size(); + else + vgpr -= t.size(); + return *this; + } + + constexpr void update(const RegisterDemand other) noexcept + { + vgpr = std::max(vgpr, other.vgpr); + sgpr = std::max(sgpr, other.sgpr); + } +}; + struct Block; struct Instruction; struct Pseudo_instruction; @@ -986,7 +1059,10 @@ struct SDWA_instruction; struct Instruction { aco_opcode opcode; Format format; - uint32_t pass_flags; + union { + uint32_t pass_flags; + RegisterDemand register_demand; + }; aco::span operands; aco::span definitions; @@ -1796,79 +1872,6 @@ enum block_kind { block_kind_end_with_regs = 1 << 15, }; -struct RegisterDemand { - constexpr RegisterDemand() = default; - constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {} - int16_t vgpr = 0; - int16_t sgpr = 0; - - constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept - { - return a.vgpr == b.vgpr && a.sgpr == b.sgpr; - } - - constexpr bool exceeds(const RegisterDemand other) const noexcept - { - return vgpr > other.vgpr || sgpr > other.sgpr; - } - - constexpr RegisterDemand operator+(const Temp t) const noexcept - { - if (t.type() == RegType::sgpr) - return RegisterDemand(vgpr, sgpr + t.size()); - else - return RegisterDemand(vgpr + t.size(), sgpr); - } - - constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept - { - return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr); - } - - constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept - { - return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr); - } - - constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept - { - vgpr += other.vgpr; - sgpr += other.sgpr; - return *this; - } - - constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept - { - vgpr -= other.vgpr; - sgpr -= other.sgpr; - return *this; - } - - constexpr RegisterDemand& operator+=(const Temp t) noexcept - { - if (t.type() == RegType::sgpr) - sgpr += t.size(); - else - vgpr += t.size(); - return *this; - } - - constexpr RegisterDemand& operator-=(const Temp t) noexcept - { - if (t.type() == RegType::sgpr) - sgpr -= t.size(); - else - vgpr -= t.size(); - return *this; - } - - constexpr void update(const RegisterDemand other) noexcept - { - vgpr = std::max(vgpr, other.vgpr); - sgpr = std::max(sgpr, other.sgpr); - } -}; - /* CFG */ struct Block { using edge_vec = small_vec; @@ -2063,8 +2066,6 @@ public: monotonic_buffer_resource memory; /* live temps out per block */ std::vector live_out; - /* register demand (sgpr/vgpr) per instruction per block */ - std::vector> register_demand; } live; struct { diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 7eb58b19123..5477f139074 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -95,10 +95,8 @@ void process_live_temps_per_block(Program* program, Block* block, unsigned& worklist, std::vector& phi_info) { - std::vector& register_demand = program->live.register_demand[block->index]; RegisterDemand new_demand; - register_demand.resize(block->instructions.size()); IDSet live = program->live.live_out[block->index]; /* initialize register demand */ @@ -114,7 +112,7 @@ process_live_temps_per_block(Program* program, Block* block, unsigned& worklist, break; program->needs_vcc |= instr_needs_vcc(insn); - register_demand[idx] = RegisterDemand(new_demand.vgpr, new_demand.sgpr); + insn->register_demand = RegisterDemand(new_demand.vgpr, new_demand.sgpr); /* KILL */ for (Definition& definition : insn->definitions) { @@ -131,7 +129,7 @@ process_live_temps_per_block(Program* program, Block* block, unsigned& worklist, new_demand -= temp; definition.setKill(false); } else { - register_demand[idx] += temp; + insn->register_demand += temp; definition.setKill(true); } } @@ -164,21 +162,21 @@ process_live_temps_per_block(Program* program, Block* block, unsigned& worklist, } } if (operand.isLateKill()) - register_demand[idx] += temp; + insn->register_demand += temp; new_demand += temp; } } } RegisterDemand before_instr = new_demand + get_additional_operand_demand(insn); - register_demand[idx].update(before_instr); + insn->register_demand.update(before_instr); } /* handle phi definitions */ uint16_t linear_phi_defs = 0; for (int phi_idx = 0; phi_idx <= idx; phi_idx++) { - register_demand[phi_idx] = new_demand; Instruction* insn = block->instructions[phi_idx].get(); + insn->register_demand = new_demand; assert(is_phi(insn) && insn->definitions.size() == 1); if (!insn->definitions[0].isTemp()) { @@ -443,7 +441,6 @@ live_var_analysis(Program* program) program->live.live_out.clear(); program->live.memory.release(); program->live.live_out.resize(program->blocks.size(), IDSet(program->live.memory)); - program->live.register_demand.resize(program->blocks.size()); unsigned worklist = program->blocks.size(); std::vector phi_info(program->blocks.size()); RegisterDemand new_demand; @@ -459,16 +456,14 @@ live_var_analysis(Program* program) /* Handle branches: we will insert copies created for linear phis just before the branch. */ for (Block& block : program->blocks) { - program->live.register_demand[block.index].back().sgpr += - phi_info[block.index].linear_phi_defs; - program->live.register_demand[block.index].back().sgpr -= - phi_info[block.index].linear_phi_ops; + block.instructions.back()->register_demand.sgpr += phi_info[block.index].linear_phi_defs; + block.instructions.back()->register_demand.sgpr -= phi_info[block.index].linear_phi_ops; /* update block's register demand */ if (program->progress < CompilationProgress::after_ra) { block.register_demand = RegisterDemand(); - for (RegisterDemand& demand : program->live.register_demand[block.index]) - block.register_demand.update(demand); + for (const aco_ptr& instr : block.instructions) + block.register_demand.update(instr->register_demand); } new_demand.update(block.register_demand); diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 7facf31998b..6b7c047d4d4 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -910,11 +910,10 @@ aco_print_block(enum amd_gfx_level gfx_level, const Block* block, FILE* output, fprintf(output, "\tdemand: %u vgpr, %u sgpr\n", demand.vgpr, demand.sgpr); } - unsigned index = 0; for (auto const& instr : block->instructions) { fprintf(output, "\t"); if (flags & print_live_vars) { - RegisterDemand demand = program->live.register_demand[block->index][index]; + RegisterDemand demand = instr->register_demand; fprintf(output, "(%3u vgpr, %3u sgpr) ", demand.vgpr, demand.sgpr); } if (flags & print_perf_info) @@ -922,7 +921,6 @@ aco_print_block(enum amd_gfx_level gfx_level, const Block* block, FILE* output, aco_print_instr(gfx_level, instr.get(), output, flags); fprintf(output, "\n"); - index++; } } diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 92117ba2235..3ad724007bc 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -58,7 +58,7 @@ struct DownwardsCursor { clause_demand(initial_clause_demand) {} - void verify_invariants(const RegisterDemand* register_demand); + void verify_invariants(const Block* block); }; /** @@ -78,7 +78,7 @@ struct UpwardsCursor { } bool has_insert_idx() const { return insert_idx != -1; } - void verify_invariants(const RegisterDemand* register_demand); + void verify_invariants(const Block* block); }; struct MoveState { @@ -86,7 +86,6 @@ struct MoveState { Block* block; Instruction* current; - RegisterDemand* register_demand; /* demand per instruction */ bool improved_rar; std::vector depends_on; @@ -145,7 +144,7 @@ move_element(T begin_it, size_t idx, size_t before) } void -DownwardsCursor::verify_invariants(const RegisterDemand* register_demand) +DownwardsCursor::verify_invariants(const Block* block) { assert(source_idx < insert_idx_clause); assert(insert_idx_clause < insert_idx); @@ -153,13 +152,13 @@ DownwardsCursor::verify_invariants(const RegisterDemand* register_demand) #ifndef NDEBUG RegisterDemand reference_demand; for (int i = source_idx + 1; i < insert_idx_clause; ++i) { - reference_demand.update(register_demand[i]); + reference_demand.update(block->instructions[i]->register_demand); } assert(total_demand == reference_demand); reference_demand = {}; for (int i = insert_idx_clause; i < insert_idx; ++i) { - reference_demand.update(register_demand[i]); + reference_demand.update(block->instructions[i]->register_demand); } assert(clause_demand == reference_demand); #endif @@ -185,8 +184,8 @@ MoveState::downwards_init(int current_idx, bool improved_rar_, bool may_form_cla } } - DownwardsCursor cursor(current_idx, register_demand[current_idx]); - cursor.verify_invariants(register_demand); + DownwardsCursor cursor(current_idx, block->instructions[current_idx]->register_demand); + cursor.verify_invariants(block); return cursor; } @@ -236,7 +235,8 @@ MoveState::downwards_move(DownwardsCursor& cursor, bool add_to_clause) /* New demand for the moved instruction */ const RegisterDemand temp = get_temp_registers(instr); const RegisterDemand temp2 = get_temp_registers(block->instructions[dest_insert_idx - 1]); - const RegisterDemand new_demand = register_demand[dest_insert_idx - 1] - temp2 + temp; + const RegisterDemand new_demand = + block->instructions[dest_insert_idx - 1]->register_demand - temp2 + temp; if (new_demand.exceeds(max_registers)) return move_fail_pressure; @@ -244,10 +244,9 @@ MoveState::downwards_move(DownwardsCursor& cursor, bool add_to_clause) move_element(block->instructions.begin(), cursor.source_idx, dest_insert_idx); /* update register pressure */ - move_element(register_demand, cursor.source_idx, dest_insert_idx); for (int i = cursor.source_idx; i < dest_insert_idx - 1; i++) - register_demand[i] -= candidate_diff; - register_demand[dest_insert_idx - 1] = new_demand; + block->instructions[i]->register_demand -= candidate_diff; + block->instructions[dest_insert_idx - 1]->register_demand = new_demand; cursor.insert_idx_clause--; if (cursor.source_idx != cursor.insert_idx_clause) { /* Update demand if we moved over any instructions before the clause */ @@ -263,7 +262,7 @@ MoveState::downwards_move(DownwardsCursor& cursor, bool add_to_clause) } cursor.source_idx--; - cursor.verify_invariants(register_demand); + cursor.verify_invariants(block); return move_success; } @@ -281,13 +280,13 @@ MoveState::downwards_skip(DownwardsCursor& cursor) } } } - cursor.total_demand.update(register_demand[cursor.source_idx]); + cursor.total_demand.update(instr->register_demand); cursor.source_idx--; - cursor.verify_invariants(register_demand); + cursor.verify_invariants(block); } void -UpwardsCursor::verify_invariants(const RegisterDemand* register_demand) +UpwardsCursor::verify_invariants(const Block* block) { #ifndef NDEBUG if (!has_insert_idx()) { @@ -298,7 +297,7 @@ UpwardsCursor::verify_invariants(const RegisterDemand* register_demand) RegisterDemand reference_demand; for (int i = insert_idx; i < source_idx; ++i) { - reference_demand.update(register_demand[i]); + reference_demand.update(block->instructions[i]->register_demand); } assert(total_demand == reference_demand); #endif @@ -335,7 +334,7 @@ void MoveState::upwards_update_insert_idx(UpwardsCursor& cursor) { cursor.insert_idx = cursor.source_idx; - cursor.total_demand = register_demand[cursor.insert_idx]; + cursor.total_demand = block->instructions[cursor.insert_idx]->register_demand; } MoveResult @@ -363,7 +362,7 @@ MoveState::upwards_move(UpwardsCursor& cursor) return move_fail_pressure; const RegisterDemand temp2 = get_temp_registers(block->instructions[cursor.insert_idx - 1]); const RegisterDemand new_demand = - register_demand[cursor.insert_idx - 1] - temp2 + candidate_diff + temp; + block->instructions[cursor.insert_idx - 1]->register_demand - temp2 + candidate_diff + temp; if (new_demand.exceeds(max_registers)) return move_fail_pressure; @@ -371,18 +370,17 @@ MoveState::upwards_move(UpwardsCursor& cursor) move_element(block->instructions.begin(), cursor.source_idx, cursor.insert_idx); /* update register pressure */ - move_element(register_demand, cursor.source_idx, cursor.insert_idx); - register_demand[cursor.insert_idx] = new_demand; + block->instructions[cursor.insert_idx]->register_demand = new_demand; for (int i = cursor.insert_idx + 1; i <= cursor.source_idx; i++) - register_demand[i] += candidate_diff; + block->instructions[i]->register_demand += candidate_diff; cursor.total_demand += candidate_diff; - cursor.total_demand.update(register_demand[cursor.source_idx]); + cursor.total_demand.update(block->instructions[cursor.source_idx]->register_demand); cursor.insert_idx++; cursor.source_idx++; - cursor.verify_invariants(register_demand); + cursor.verify_invariants(block); return move_success; } @@ -400,12 +398,12 @@ MoveState::upwards_skip(UpwardsCursor& cursor) if (op.isTemp()) RAR_dependencies[op.tempId()] = true; } - cursor.total_demand.update(register_demand[cursor.source_idx]); + cursor.total_demand.update(instr->register_demand); } cursor.source_idx++; - cursor.verify_invariants(register_demand); + cursor.verify_invariants(block); } bool @@ -1172,7 +1170,6 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block) ctx.last_SMEM_dep_idx = 0; ctx.last_SMEM_stall = INT16_MIN; ctx.mv.block = block; - ctx.mv.register_demand = program->live.register_demand[block->index].data(); /* go through all instructions and find memory loads */ unsigned num_stores = 0; @@ -1225,9 +1222,8 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block) /* resummarize the block's register demand */ block->register_demand = RegisterDemand(); - for (unsigned idx = 0; idx < block->instructions.size(); idx++) { - block->register_demand.update(program->live.register_demand[block->index][idx]); - } + for (const aco_ptr& instr : block->instructions) + block->register_demand.update(instr->register_demand); } } /* end namespace */ @@ -1299,8 +1295,11 @@ schedule_program(Program* program) std::vector block_demands(program->blocks.size()); std::vector> register_demands(program->blocks.size()); for (unsigned j = 0; j < program->blocks.size(); j++) { - block_demands[j] = program->blocks[j].register_demand; - register_demands[j] = program->live.register_demand[j]; + Block &b = program->blocks[j]; + block_demands[j] = b.register_demand; + register_demands[j].reserve(b.instructions.size()); + for (unsigned i = 0; i < b.instructions.size(); i++) + register_demands[j].emplace_back(b.instructions[i]->register_demand); } aco::live_var_analysis(program); @@ -1308,7 +1307,7 @@ schedule_program(Program* program) for (unsigned j = 0; j < program->blocks.size(); j++) { Block &b = program->blocks[j]; for (unsigned i = 0; i < b.instructions.size(); i++) - assert(register_demands[b.index][i] == program->live.register_demand[b.index][i]); + assert(register_demands[j][i] == b.instructions[i]->register_demand); assert(b.register_demand == block_demands[j]); } diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 1404d90a02c..0e2c5822885 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -574,7 +574,6 @@ add_coupling_code(spill_ctx& ctx, Block* block, IDSet& live_in) if (block->linear_preds.size() == 1 && !(block->kind & (block_kind_loop_exit | block_kind_loop_header))) { assert(ctx.processed[block->linear_preds[0]]); - assert(ctx.program->live.register_demand[block_idx].size() == block->instructions.size()); ctx.renames[block_idx] = ctx.renames[block->linear_preds[0]]; if (!block->logical_preds.empty() && block->logical_preds[0] != block->linear_preds[0]) { @@ -872,9 +871,8 @@ add_coupling_code(spill_ctx& ctx, Block* block, IDSet& live_in) phi->operands[i] = Operand(tmp); } phi->definitions[0] = Definition(rename); + phi->register_demand = block->live_in_demand; block->instructions.insert(block->instructions.begin(), std::move(phi)); - ctx.program->live.register_demand[block->index].insert( - ctx.program->live.register_demand[block->index].begin(), block->live_in_demand); } /* the variable was renamed: add new name to renames */ @@ -935,7 +933,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s /* check if register demand is low enough during and after the current instruction */ if (block->register_demand.exceeds(ctx.target_pressure)) { - RegisterDemand new_demand = ctx.program->live.register_demand[block_idx][idx]; + RegisterDemand new_demand = instr->register_demand; /* if reg pressure is too high, spill variable with furthest next use */ while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) {