From c9e667b7adeeaff4c71d0dbcf2a74d812dca11cf Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Mon, 6 Jan 2025 20:51:32 -0800 Subject: [PATCH] intel/elk: Remove uses of VLAs Was causing trouble in some build configurations, we don't really need them. Unless there's a good reason, defaults to use ralloc for consistency with the larger codebase. Reviewed-by: Kenneth Graunke Reviewed-by: Antonio Ospite Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/elk/elk_fs.cpp | 3 ++- .../compiler/elk/elk_fs_reg_allocate.cpp | 15 ++++++------- .../elk/elk_schedule_instructions.cpp | 4 +++- .../compiler/elk/elk_test_eu_validate.cpp | 8 +++++-- src/intel/compiler/elk/elk_vec4.cpp | 16 ++++++++------ .../elk/elk_vec4_copy_propagation.cpp | 10 ++++----- .../compiler/elk/elk_vec4_reg_allocate.cpp | 21 +++++++++++-------- src/intel/compiler/elk/elk_vec4_visitor.cpp | 6 ++++-- 8 files changed, 50 insertions(+), 33 deletions(-) diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index 8a931c3ac2d..9c27c87307b 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -1697,7 +1697,7 @@ elk_fs_visitor::split_virtual_grfs() /* Count the total number of registers */ unsigned reg_count = 0; - unsigned vgrf_to_reg[num_vars]; + unsigned *vgrf_to_reg = new unsigned[num_vars]; for (unsigned i = 0; i < num_vars; i++) { vgrf_to_reg[i] = reg_count; reg_count += alloc.sizes[i]; @@ -1866,6 +1866,7 @@ cleanup: delete[] vgrf_has_split; delete[] new_virtual_grf; delete[] new_reg_offset; + delete[] vgrf_to_reg; return progress; } diff --git a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp index 86f812ed44c..ec122b9aaac 100644 --- a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp +++ b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp @@ -47,7 +47,7 @@ assign_reg(const struct intel_device_info *devinfo, void elk_fs_visitor::assign_regs_trivial() { - unsigned hw_reg_mapping[this->alloc.count + 1]; + unsigned *hw_reg_mapping = ralloc_array(NULL, unsigned, this->alloc.count + 1); unsigned i; int reg_width = dispatch_width / 8; @@ -74,6 +74,7 @@ elk_fs_visitor::assign_regs_trivial() this->alloc.count = this->grf_used; } + ralloc_free(hw_reg_mapping); } /** @@ -840,11 +841,7 @@ void elk_fs_reg_alloc::set_spill_costs() { float block_scale = 1.0; - float spill_costs[fs->alloc.count]; - - for (unsigned i = 0; i < fs->alloc.count; i++) { - spill_costs[i] = 0.0; - } + float *spill_costs = rzalloc_array(NULL, float, fs->alloc.count); /* Calculate costs for spilling nodes. Call it a cost of 1 per * spill/unspill we'll have to do, and guess that the insides of @@ -919,6 +916,8 @@ elk_fs_reg_alloc::set_spill_costs() } have_spill_costs = true; + + ralloc_free(spill_costs); } int @@ -1179,7 +1178,7 @@ elk_fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all) * regs in the register classes back down to real hardware reg * numbers. */ - unsigned hw_reg_mapping[fs->alloc.count]; + unsigned *hw_reg_mapping = ralloc_array(NULL, unsigned, fs->alloc.count); fs->grf_used = fs->first_non_payload_grf; for (unsigned i = 0; i < fs->alloc.count; i++) { int reg = ra_get_node_reg(g, first_vgrf_node + i); @@ -1199,6 +1198,8 @@ elk_fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all) fs->alloc.count = fs->grf_used; + ralloc_free(hw_reg_mapping); + return true; } diff --git a/src/intel/compiler/elk/elk_schedule_instructions.cpp b/src/intel/compiler/elk/elk_schedule_instructions.cpp index e1e514f583c..e6f19858e4d 100644 --- a/src/intel/compiler/elk/elk_schedule_instructions.cpp +++ b/src/intel/compiler/elk/elk_schedule_instructions.cpp @@ -882,7 +882,7 @@ elk_fs_instruction_scheduler::setup_liveness(elk_cfg_t *cfg) } } - int payload_last_use_ip[hw_reg_count]; + int *payload_last_use_ip = ralloc_array(NULL, int, hw_reg_count); v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip); for (unsigned i = 0; i < hw_reg_count; i++) { @@ -897,6 +897,8 @@ elk_fs_instruction_scheduler::setup_liveness(elk_cfg_t *cfg) BITSET_SET(hw_liveout[block], i); } } + + ralloc_free(payload_last_use_ip); } void diff --git a/src/intel/compiler/elk/elk_test_eu_validate.cpp b/src/intel/compiler/elk/elk_test_eu_validate.cpp index 404a02608fa..33ae6938182 100644 --- a/src/intel/compiler/elk/elk_test_eu_validate.cpp +++ b/src/intel/compiler/elk/elk_test_eu_validate.cpp @@ -269,7 +269,9 @@ TEST_P(validation_test, invalid_type_encoding) * instructions, so keep a record in a bitset the invalid patterns so * they can be verified to be invalid when used. */ - BITSET_DECLARE(invalid_encodings, num_encodings); + const int max_bits = 4; + assert(max_bits >= num_bits); + BITSET_DECLARE(invalid_encodings, 1 << max_bits); const struct { enum elk_reg_type type; @@ -377,7 +379,9 @@ TEST_P(validation_test, invalid_type_encoding_3src_a16) * instructions, so keep a record in a bitset the invalid patterns so * they can be verified to be invalid when used. */ - BITSET_DECLARE(invalid_encodings, num_encodings); + const int max_bits = 3; + assert(max_bits >= num_bits); + BITSET_DECLARE(invalid_encodings, 1 << max_bits); const struct { enum elk_reg_type type; diff --git a/src/intel/compiler/elk/elk_vec4.cpp b/src/intel/compiler/elk/elk_vec4.cpp index 1d5492f35f3..37cf445a7f2 100644 --- a/src/intel/compiler/elk/elk_vec4.cpp +++ b/src/intel/compiler/elk/elk_vec4.cpp @@ -1264,10 +1264,8 @@ void vec4_visitor::split_virtual_grfs() { int num_vars = this->alloc.count; - int new_virtual_grf[num_vars]; - bool split_grf[num_vars]; - - memset(new_virtual_grf, 0, sizeof(new_virtual_grf)); + int *new_virtual_grf = rzalloc_array(NULL, int, num_vars); + bool *split_grf = ralloc_array(NULL, bool, num_vars); /* Try to split anything > 0 sized. */ for (int i = 0; i < num_vars; i++) { @@ -1319,6 +1317,10 @@ vec4_visitor::split_virtual_grfs() } } } + + ralloc_free(new_virtual_grf); + ralloc_free(split_grf); + invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES); } @@ -2490,14 +2492,16 @@ vec4_visitor::run() if (INTEL_DEBUG(DEBUG_SPILL_VEC4)) { /* Debug of register spilling: Go spill everything. */ const int grf_count = alloc.count; - float spill_costs[alloc.count]; - bool no_spill[alloc.count]; + float *spill_costs = ralloc_array(NULL, float, alloc.count); + bool *no_spill = ralloc_array(NULL, bool, alloc.count); evaluate_spill_costs(spill_costs, no_spill); for (int i = 0; i < grf_count; i++) { if (no_spill[i]) continue; spill_reg(i); } + ralloc_free(spill_costs); + ralloc_free(no_spill); /* We want to run this after spilling because 64-bit (un)spills need to * emit code to shuffle 64-bit data for the 32-bit scratch read/write diff --git a/src/intel/compiler/elk/elk_vec4_copy_propagation.cpp b/src/intel/compiler/elk/elk_vec4_copy_propagation.cpp index 5b3b785f2a0..7aae45c2820 100644 --- a/src/intel/compiler/elk/elk_vec4_copy_propagation.cpp +++ b/src/intel/compiler/elk/elk_vec4_copy_propagation.cpp @@ -465,9 +465,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) const int attributes_per_reg = prog_data->dispatch_mode == INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; bool progress = false; - struct copy_entry entries[alloc.total_size]; - - memset(&entries, 0, sizeof(entries)); + struct copy_entry *entries = rzalloc_array(NULL, copy_entry, alloc.total_size); foreach_block_and_inst(block, vec4_instruction, inst, cfg) { /* This pass only works on basic blocks. If there's flow @@ -478,7 +476,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) * src/glsl/opt_copy_propagation.cpp to track available copies. */ if (!is_dominated_by_previous_instruction(inst)) { - memset(&entries, 0, sizeof(entries)); + memset(&entries, 0, sizeof(copy_entry) * alloc.total_size); continue; } @@ -532,7 +530,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) * our destination's updated channels, as the two are no longer equal. */ if (inst->dst.reladdr) - memset(&entries, 0, sizeof(entries)); + memset(&entries, 0, sizeof(copy_entry) * alloc.total_size); else { for (unsigned i = 0; i < alloc.total_size; i++) { for (int j = 0; j < 4; j++) { @@ -546,6 +544,8 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) } } + ralloc_free(entries); + if (progress) invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | DEPENDENCY_INSTRUCTION_DETAIL); diff --git a/src/intel/compiler/elk/elk_vec4_reg_allocate.cpp b/src/intel/compiler/elk/elk_vec4_reg_allocate.cpp index 47cae724de9..7f8d9106eb7 100644 --- a/src/intel/compiler/elk/elk_vec4_reg_allocate.cpp +++ b/src/intel/compiler/elk/elk_vec4_reg_allocate.cpp @@ -43,17 +43,13 @@ assign(unsigned int *reg_hw_locations, elk_backend_reg *reg) bool vec4_visitor::reg_allocate_trivial() { - unsigned int hw_reg_mapping[this->alloc.count]; - bool virtual_grf_used[this->alloc.count]; + unsigned int *hw_reg_mapping = ralloc_array(NULL, unsigned, this->alloc.count); + bool *virtual_grf_used = rzalloc_array(NULL, bool, this->alloc.count); int next; /* Calculate which virtual GRFs are actually in use after whatever * optimization passes have occurred. */ - for (unsigned i = 0; i < this->alloc.count; i++) { - virtual_grf_used[i] = false; - } - foreach_block_and_inst(block, vec4_instruction, inst, cfg) { if (inst->dst.file == VGRF) virtual_grf_used[inst->dst.nr] = true; @@ -81,6 +77,9 @@ vec4_visitor::reg_allocate_trivial() assign(hw_reg_mapping, &inst->src[2]); } + ralloc_free(virtual_grf_used); + ralloc_free(hw_reg_mapping); + if (prog_data->total_grf > max_grf) { fail("Ran out of regs on trivial allocator (%d/%d)\n", prog_data->total_grf, max_grf); @@ -158,7 +157,6 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g, bool vec4_visitor::reg_allocate() { - unsigned int hw_reg_mapping[alloc.count]; int payload_reg_count = this->first_non_payload_grf; /* Using the trivial allocator can be useful in debugging undefined @@ -176,6 +174,8 @@ vec4_visitor::reg_allocate() struct ra_graph *g = ra_alloc_interference_graph(compiler->vec4_reg_set.regs, node_count); + unsigned int *hw_reg_mapping = ralloc_array(g, unsigned, alloc.count); + for (unsigned i = 0; i < alloc.count; i++) { int size = this->alloc.sizes[i]; assert(size >= 1 && size <= MAX_VGRF_SIZE(devinfo)); @@ -454,8 +454,8 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) int vec4_visitor::choose_spill_reg(struct ra_graph *g) { - float spill_costs[this->alloc.count]; - bool no_spill[this->alloc.count]; + float *spill_costs = ralloc_array(NULL, float, this->alloc.count); + bool *no_spill = ralloc_array(NULL, bool, this->alloc.count); evaluate_spill_costs(spill_costs, no_spill); @@ -464,6 +464,9 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g) ra_set_node_spill_cost(g, i, spill_costs[i]); } + ralloc_free(spill_costs); + ralloc_free(no_spill); + return ra_get_best_spill_node(g); } diff --git a/src/intel/compiler/elk/elk_vec4_visitor.cpp b/src/intel/compiler/elk/elk_vec4_visitor.cpp index c361c5de6fa..6ce17f0d73d 100644 --- a/src/intel/compiler/elk/elk_vec4_visitor.cpp +++ b/src/intel/compiler/elk/elk_vec4_visitor.cpp @@ -1264,8 +1264,8 @@ vec4_visitor::emit_resolve_reladdr(int scratch_loc[], elk_bblock_t *block, void vec4_visitor::move_grf_array_access_to_scratch() { - int scratch_loc[this->alloc.count]; - memset(scratch_loc, -1, sizeof(scratch_loc)); + int *scratch_loc = ralloc_array(NULL, int, this->alloc.count); + memset(scratch_loc, -1, sizeof(int) * this->alloc.count); /* First, calculate the set of virtual GRFs that need to be punted * to scratch due to having any array access on them, and where in @@ -1333,6 +1333,8 @@ vec4_visitor::move_grf_array_access_to_scratch() inst->src[i]); } } + + ralloc_free(scratch_loc); } void