diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b7a1ffc233d..cb77c691cb9 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -799,6 +799,19 @@ fs_inst::is_raw_move() const brw_type_size_bits(src[0].type) == brw_type_size_bits(dst.type)); } +bool +fs_inst::uses_address_register_implicitly() const +{ + switch (opcode) { + case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_SHUFFLE: + case SHADER_OPCODE_MOV_INDIRECT: + return true; + default: + return false; + } +} + /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch. * This brings in those uniform definitions */ diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 739fc827f90..8d66055bc59 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -448,6 +448,8 @@ public: /* The API selected subgroup size */ unsigned api_subgroup_size; /**< 0, 8, 16, 32 */ + unsigned next_address_register_nr; + struct brw_shader_stats shader_stats; void debug_optimizer(const nir_shader *nir, diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 638f1ac9145..c1820d8989a 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -212,6 +212,14 @@ namespace brw { return retype(null_reg_ud(), type); } + brw_reg + vaddr(enum brw_reg_type type, unsigned subnr) const + { + brw_reg addr = brw_address_reg(subnr); + addr.nr = shader->next_address_register_nr++; + return retype(addr, type); + } + /** * Create a null register of floating type. */ diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp index d80532fc4af..4796edfa449 100644 --- a/src/intel/compiler/brw_fs_validate.cpp +++ b/src/intel/compiler/brw_fs_validate.cpp @@ -281,6 +281,16 @@ brw_fs_validate(const fs_visitor &s) s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage)); foreach_block(block, s.cfg) { + /* Track the last used address register. Usage of the address register + * in the IR should be limited to within a block, otherwise we would + * unable to schedule some instructions without spilling the address + * register to a VGRF. + * + * Another pattern we stick to when using the address register in the IR + * is that we write and read the register in pairs of instruction. + */ + uint32_t last_used_address_register[16] = {}; + foreach_inst_in_block (fs_inst, inst, block) { brw_validate_instruction_phase(s, inst); @@ -392,15 +402,24 @@ brw_fs_validate(const fs_visitor &s) if (inst->dst.file == VGRF) { fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst), s.alloc.sizes[inst->dst.nr]); - if (inst->exec_size > 1) fsv_assert_ne(inst->dst.stride, 0); + } else if (inst->dst.is_address()) { + fsv_assert(inst->dst.nr != 0); } + bool read_address_reg = false; for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i), s.alloc.sizes[inst->src[i].nr]); + } else if (inst->src[i].is_address()) { + fsv_assert(inst->src[i].nr != 0); + for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) { + fsv_assert_eq(inst->src[i].nr, + last_used_address_register[inst->src[i].address_slot(hw)]); + } + read_address_reg = true; } } @@ -516,6 +535,30 @@ brw_fs_validate(const fs_visitor &s) inst->src[i].type != BRW_TYPE_HF); } } + + /* Update the last used address register. */ + if (read_address_reg) { + /* When an instruction only reads the address register, we assume + * the read parts are never going to be used again. + */ + for (unsigned i = 0; i < inst->sources; i++) { + if (!inst->src[i].is_address()) + continue; + for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) + last_used_address_register[inst->src[i].address_slot(hw)] = 0; + } + } + if (inst->dst.is_address()) { + /* For the written part of the address register */ + for (unsigned hw = 0; hw < inst->size_written; hw += 2) + last_used_address_register[inst->dst.address_slot(hw)] = inst->dst.nr; + } else if (inst->uses_address_register_implicitly()) { + /* If the instruction is making use of the address register, + * discard the entire thing. + */ + memset(last_used_address_register, 0, + sizeof(last_used_address_register)); + } } } } diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 907203555e3..b055c9fc031 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -471,6 +471,8 @@ fs_visitor::init() this->spilled_any_registers = false; this->phase = BRW_SHADER_PHASE_INITIAL; + + this->next_address_register_nr = 1; } fs_visitor::~fs_visitor() diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 27dbecf92dd..f9564886625 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -122,6 +122,12 @@ public: */ bool has_sampler_residency() const; + /** + * Return true if this instruction is using the address register + * implicitly. + */ + bool uses_address_register_implicitly() const; + uint8_t sources; /**< Number of brw_reg sources. */ /** diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index c008a4b94f1..dea47d75628 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -434,7 +434,10 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons fprintf(file, "***attr%d***", inst->dst.nr); break; case ADDRESS: - fprintf(file, "a0.%d", inst->dst.subnr); + if (inst->dst.nr == 0) + fprintf(file, "a0.%d", inst->dst.subnr); + else + fprintf(file, "va%u.%d", inst->dst.nr, inst->dst.subnr); break; case ARF: switch (inst->dst.nr & 0xF0) { @@ -498,7 +501,10 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons fprintf(file, "g%d", inst->src[i].nr); break; case ADDRESS: - fprintf(file, "a0.%d", inst->src[i].subnr); + if (inst->src[i].nr == 0) + fprintf(file, "a0.%d", inst->src[i].subnr); + else + fprintf(file, "va%u.%d", inst->src[i].nr, inst->src[i].subnr); break; case ATTR: fprintf(file, "attr%d", inst->src[i].nr); diff --git a/src/intel/compiler/brw_reg.cpp b/src/intel/compiler/brw_reg.cpp index ab119dbcfaa..aab993ba296 100644 --- a/src/intel/compiler/brw_reg.cpp +++ b/src/intel/compiler/brw_reg.cpp @@ -263,6 +263,19 @@ brw_reg::is_accumulator() const return file == ARF && (nr & 0xF0) == BRW_ARF_ACCUMULATOR; } +bool +brw_reg::is_address() const +{ + return file == ADDRESS; +} + +unsigned +brw_reg::address_slot(unsigned byte_offset) const +{ + assert(is_address()); + return (reg_offset(*this) + byte_offset) / 2; +} + bool brw_reg::equals(const brw_reg &r) const { diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 144f2554195..2a5de328419 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -226,6 +226,9 @@ typedef struct brw_reg { bool is_negative_one() const; bool is_null() const; bool is_accumulator() const; + bool is_address() const; + + unsigned address_slot(unsigned byte_offset) const; /** * Return the size in bytes of a single logical component of the diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index d8449e09906..64d2e25f633 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -323,4 +323,3 @@ fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates) exec_node::remove(); } -