From 27ce5d921ef00c46d531df1b60f9e7d6d588b0e0 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 8 Oct 2020 15:11:12 +0100 Subject: [PATCH] aco: remove isel_context::allocated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have Program::temp_rc, we can replace it with the first temporary id allocated for NIR's ssa defs. No fossil-db changes on Navi. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 8 ++--- src/amd/compiler/aco_instruction_selection.h | 2 +- .../aco_instruction_selection_setup.cpp | 36 +++++++++---------- src/amd/compiler/aco_ir.h | 12 ++++--- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index b5dc49a52d6..38ea21c3817 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -121,8 +121,8 @@ static void append_logical_end(Block *b) Temp get_ssa_temp(struct isel_context *ctx, nir_ssa_def *def) { - assert(ctx->allocated[def->index].id()); - return ctx->allocated[def->index]; + uint32_t id = ctx->first_temp_id + def->index; + return Temp(id, ctx->program->temp_rc[id]); } Temp emit_mbcnt(isel_context *ctx, Temp dst, Operand mask = Operand(), Operand base = Operand(0u)) @@ -923,8 +923,8 @@ void emit_comparison(isel_context *ctx, nir_alu_instr *instr, Temp dst, aco_opcode v_op = instr->src[0].src.ssa->bit_size == 64 ? v64_op : instr->src[0].src.ssa->bit_size == 32 ? v32_op : v16_op; bool use_valu = s_op == aco_opcode::num_opcodes || nir_dest_is_divergent(instr->dest.dest) || - ctx->allocated[instr->src[0].src.ssa->index].type() == RegType::vgpr || - ctx->allocated[instr->src[1].src.ssa->index].type() == RegType::vgpr; + get_ssa_temp(ctx, instr->src[0].src.ssa).type() == RegType::vgpr || + get_ssa_temp(ctx, instr->src[1].src.ssa).type() == RegType::vgpr; aco_opcode op = use_valu ? v_op : s_op; assert(op != aco_opcode::num_opcodes); assert(dst.regClass() == ctx->program->lane_mask); diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index 107d116d9fd..cdb19ead984 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -59,7 +59,7 @@ struct isel_context { nir_shader *shader; uint32_t constant_data_offset; Block *block; - std::unique_ptr allocated; + uint32_t first_temp_id; std::unordered_map> allocated_vec; Stage stage; bool has_gfx10_wave64_bpermute = false; diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 0492ca60890..a59af50e9b8 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -649,7 +649,9 @@ void init_context(isel_context *ctx, nir_shader *shader) nir_print_shader(shader, stderr); } - std::unique_ptr allocated{new Temp[impl->ssa_alloc]()}; + ctx->first_temp_id = ctx->program->peekAllocationId(); + ctx->program->allocateRange(impl->ssa_alloc); + RegClass *regclasses = ctx->program->temp_rc.data() + ctx->first_temp_id; unsigned spi_ps_inputs = 0; @@ -736,21 +738,21 @@ void init_context(isel_context *ctx, nir_shader *shader) /* fallthrough */ default: for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) { - if (allocated[alu_instr->src[i].src.ssa->index].type() == RegType::vgpr) + if (regclasses[alu_instr->src[i].src.ssa->index].type() == RegType::vgpr) type = RegType::vgpr; } break; } RegClass rc = get_reg_class(ctx, type, alu_instr->dest.dest.ssa.num_components, alu_instr->dest.dest.ssa.bit_size); - allocated[alu_instr->dest.dest.ssa.index] = Temp(0, rc); + regclasses[alu_instr->dest.dest.ssa.index] = rc; break; } case nir_instr_type_load_const: { unsigned num_components = nir_instr_as_load_const(instr)->def.num_components; unsigned bit_size = nir_instr_as_load_const(instr)->def.bit_size; RegClass rc = get_reg_class(ctx, RegType::sgpr, num_components, bit_size); - allocated[nir_instr_as_load_const(instr)->def.index] = Temp(0, rc); + regclasses[nir_instr_as_load_const(instr)->def.index] = rc; break; } case nir_instr_type_intrinsic: { @@ -870,13 +872,13 @@ void init_context(isel_context *ctx, nir_shader *shader) break; default: for (unsigned i = 0; i < nir_intrinsic_infos[intrinsic->intrinsic].num_srcs; i++) { - if (allocated[intrinsic->src[i].ssa->index].type() == RegType::vgpr) + if (regclasses[intrinsic->src[i].ssa->index].type() == RegType::vgpr) type = RegType::vgpr; } break; } RegClass rc = get_reg_class(ctx, type, intrinsic->dest.ssa.num_components, intrinsic->dest.ssa.bit_size); - allocated[intrinsic->dest.ssa.index] = Temp(0, rc); + regclasses[intrinsic->dest.ssa.index] = rc; switch(intrinsic->intrinsic) { case nir_intrinsic_load_barycentric_sample: @@ -926,12 +928,12 @@ void init_context(isel_context *ctx, nir_shader *shader) RegClass rc = get_reg_class(ctx, type, tex->dest.ssa.num_components, tex->dest.ssa.bit_size); - allocated[tex->dest.ssa.index] = Temp(0, rc); + regclasses[tex->dest.ssa.index] = rc; break; } case nir_instr_type_parallel_copy: { nir_foreach_parallel_copy_entry(entry, nir_instr_as_parallel_copy(instr)) { - allocated[entry->dest.ssa.index] = allocated[entry->src.ssa->index]; + regclasses[entry->dest.ssa.index] = regclasses[entry->src.ssa->index]; } break; } @@ -939,7 +941,7 @@ void init_context(isel_context *ctx, nir_shader *shader) unsigned num_components = nir_instr_as_ssa_undef(instr)->def.num_components; unsigned bit_size = nir_instr_as_ssa_undef(instr)->def.bit_size; RegClass rc = get_reg_class(ctx, RegType::sgpr, num_components, bit_size); - allocated[nir_instr_as_ssa_undef(instr)->def.index] = Temp(0, rc); + regclasses[nir_instr_as_ssa_undef(instr)->def.index] = rc; break; } case nir_instr_type_phi: { @@ -951,7 +953,7 @@ void init_context(isel_context *ctx, nir_shader *shader) assert(size == 1 && "multiple components not yet supported on boolean phis."); type = RegType::sgpr; size *= lane_mask_size; - allocated[phi->dest.ssa.index] = Temp(0, RegClass(type, size)); + regclasses[phi->dest.ssa.index] = RegClass(type, size); break; } @@ -960,21 +962,21 @@ void init_context(isel_context *ctx, nir_shader *shader) } else { type = RegType::sgpr; nir_foreach_phi_src (src, phi) { - if (allocated[src->src.ssa->index].type() == RegType::vgpr) + if (regclasses[src->src.ssa->index].type() == RegType::vgpr) type = RegType::vgpr; - if (allocated[src->src.ssa->index].type() == RegType::none) + if (regclasses[src->src.ssa->index].type() == RegType::none) done = false; } } RegClass rc = get_reg_class(ctx, type, phi->dest.ssa.num_components, phi->dest.ssa.bit_size); - if (rc != allocated[phi->dest.ssa.index].regClass()) { + if (rc != regclasses[phi->dest.ssa.index]) { done = false; } else { nir_foreach_phi_src(src, phi) - assert(allocated[src->src.ssa->index].size() == rc.size()); + assert(regclasses[src->src.ssa->index].size() == rc.size()); } - allocated[phi->dest.ssa.index] = Temp(0, rc); + regclasses[phi->dest.ssa.index] = rc; break; } default: @@ -997,10 +999,6 @@ void init_context(isel_context *ctx, nir_shader *shader) ctx->program->config->spi_ps_input_ena = spi_ps_inputs; ctx->program->config->spi_ps_input_addr = spi_ps_inputs; - for (unsigned i = 0; i < impl->ssa_alloc; i++) - allocated[i] = ctx->program->allocateTmp(allocated[i].regClass()); - - ctx->allocated.reset(allocated.release()); ctx->cf_info.nir_to_aco.reset(nir_to_aco.release()); /* align and copy constant data */ diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 96c20c0b8aa..4535d26e5e9 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1669,6 +1669,13 @@ public: return allocationID++; } + void allocateRange(unsigned amount) + { + assert(allocationID + amount <= 16777216); + temp_rc.resize(temp_rc.size() + amount); + allocationID += amount; + } + Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); @@ -1679,11 +1686,6 @@ public: return allocationID; } - void setAllocationId(uint32_t id) - { - allocationID = id; - } - Block* create_and_insert_block() { blocks.emplace_back(blocks.size()); blocks.back().fp_mode = next_fp_mode;