brw: add infra to make use of the address register in the IR
This limits the address register to simple cases inside a block. Validation ensures that the address register is only written once and read once. Instruction scheduling makes sure that instructions using the address register in the generator are not scheduled while there is an usage of the register in the IR. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28199>
This commit is contained in:

committed by
Marge Bot

parent
c9fa235c28
commit
0a5bdf1199
@@ -799,6 +799,19 @@ fs_inst::is_raw_move() const
|
||||
brw_type_size_bits(src[0].type) == brw_type_size_bits(dst.type));
|
||||
}
|
||||
|
||||
bool
|
||||
fs_inst::uses_address_register_implicitly() const
|
||||
{
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_SHUFFLE:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
|
||||
* This brings in those uniform definitions
|
||||
*/
|
||||
|
@@ -448,6 +448,8 @@ public:
|
||||
/* The API selected subgroup size */
|
||||
unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
|
||||
|
||||
unsigned next_address_register_nr;
|
||||
|
||||
struct brw_shader_stats shader_stats;
|
||||
|
||||
void debug_optimizer(const nir_shader *nir,
|
||||
|
@@ -212,6 +212,14 @@ namespace brw {
|
||||
return retype(null_reg_ud(), type);
|
||||
}
|
||||
|
||||
brw_reg
|
||||
vaddr(enum brw_reg_type type, unsigned subnr) const
|
||||
{
|
||||
brw_reg addr = brw_address_reg(subnr);
|
||||
addr.nr = shader->next_address_register_nr++;
|
||||
return retype(addr, type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a null register of floating type.
|
||||
*/
|
||||
|
@@ -281,6 +281,16 @@ brw_fs_validate(const fs_visitor &s)
|
||||
s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
|
||||
|
||||
foreach_block(block, s.cfg) {
|
||||
/* Track the last used address register. Usage of the address register
|
||||
* in the IR should be limited to within a block, otherwise we would
|
||||
* unable to schedule some instructions without spilling the address
|
||||
* register to a VGRF.
|
||||
*
|
||||
* Another pattern we stick to when using the address register in the IR
|
||||
* is that we write and read the register in pairs of instruction.
|
||||
*/
|
||||
uint32_t last_used_address_register[16] = {};
|
||||
|
||||
foreach_inst_in_block (fs_inst, inst, block) {
|
||||
brw_validate_instruction_phase(s, inst);
|
||||
|
||||
@@ -392,15 +402,24 @@ brw_fs_validate(const fs_visitor &s)
|
||||
if (inst->dst.file == VGRF) {
|
||||
fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
|
||||
s.alloc.sizes[inst->dst.nr]);
|
||||
|
||||
if (inst->exec_size > 1)
|
||||
fsv_assert_ne(inst->dst.stride, 0);
|
||||
} else if (inst->dst.is_address()) {
|
||||
fsv_assert(inst->dst.nr != 0);
|
||||
}
|
||||
|
||||
bool read_address_reg = false;
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
|
||||
s.alloc.sizes[inst->src[i].nr]);
|
||||
} else if (inst->src[i].is_address()) {
|
||||
fsv_assert(inst->src[i].nr != 0);
|
||||
for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) {
|
||||
fsv_assert_eq(inst->src[i].nr,
|
||||
last_used_address_register[inst->src[i].address_slot(hw)]);
|
||||
}
|
||||
read_address_reg = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -516,6 +535,30 @@ brw_fs_validate(const fs_visitor &s)
|
||||
inst->src[i].type != BRW_TYPE_HF);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the last used address register. */
|
||||
if (read_address_reg) {
|
||||
/* When an instruction only reads the address register, we assume
|
||||
* the read parts are never going to be used again.
|
||||
*/
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (!inst->src[i].is_address())
|
||||
continue;
|
||||
for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2)
|
||||
last_used_address_register[inst->src[i].address_slot(hw)] = 0;
|
||||
}
|
||||
}
|
||||
if (inst->dst.is_address()) {
|
||||
/* For the written part of the address register */
|
||||
for (unsigned hw = 0; hw < inst->size_written; hw += 2)
|
||||
last_used_address_register[inst->dst.address_slot(hw)] = inst->dst.nr;
|
||||
} else if (inst->uses_address_register_implicitly()) {
|
||||
/* If the instruction is making use of the address register,
|
||||
* discard the entire thing.
|
||||
*/
|
||||
memset(last_used_address_register, 0,
|
||||
sizeof(last_used_address_register));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -471,6 +471,8 @@ fs_visitor::init()
|
||||
this->spilled_any_registers = false;
|
||||
|
||||
this->phase = BRW_SHADER_PHASE_INITIAL;
|
||||
|
||||
this->next_address_register_nr = 1;
|
||||
}
|
||||
|
||||
fs_visitor::~fs_visitor()
|
||||
|
@@ -122,6 +122,12 @@ public:
|
||||
*/
|
||||
bool has_sampler_residency() const;
|
||||
|
||||
/**
|
||||
* Return true if this instruction is using the address register
|
||||
* implicitly.
|
||||
*/
|
||||
bool uses_address_register_implicitly() const;
|
||||
|
||||
uint8_t sources; /**< Number of brw_reg sources. */
|
||||
|
||||
/**
|
||||
|
@@ -434,7 +434,10 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons
|
||||
fprintf(file, "***attr%d***", inst->dst.nr);
|
||||
break;
|
||||
case ADDRESS:
|
||||
fprintf(file, "a0.%d", inst->dst.subnr);
|
||||
if (inst->dst.nr == 0)
|
||||
fprintf(file, "a0.%d", inst->dst.subnr);
|
||||
else
|
||||
fprintf(file, "va%u.%d", inst->dst.nr, inst->dst.subnr);
|
||||
break;
|
||||
case ARF:
|
||||
switch (inst->dst.nr & 0xF0) {
|
||||
@@ -498,7 +501,10 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons
|
||||
fprintf(file, "g%d", inst->src[i].nr);
|
||||
break;
|
||||
case ADDRESS:
|
||||
fprintf(file, "a0.%d", inst->src[i].subnr);
|
||||
if (inst->src[i].nr == 0)
|
||||
fprintf(file, "a0.%d", inst->src[i].subnr);
|
||||
else
|
||||
fprintf(file, "va%u.%d", inst->src[i].nr, inst->src[i].subnr);
|
||||
break;
|
||||
case ATTR:
|
||||
fprintf(file, "attr%d", inst->src[i].nr);
|
||||
|
@@ -263,6 +263,19 @@ brw_reg::is_accumulator() const
|
||||
return file == ARF && (nr & 0xF0) == BRW_ARF_ACCUMULATOR;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_reg::is_address() const
|
||||
{
|
||||
return file == ADDRESS;
|
||||
}
|
||||
|
||||
unsigned
|
||||
brw_reg::address_slot(unsigned byte_offset) const
|
||||
{
|
||||
assert(is_address());
|
||||
return (reg_offset(*this) + byte_offset) / 2;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_reg::equals(const brw_reg &r) const
|
||||
{
|
||||
|
@@ -226,6 +226,9 @@ typedef struct brw_reg {
|
||||
bool is_negative_one() const;
|
||||
bool is_null() const;
|
||||
bool is_accumulator() const;
|
||||
bool is_address() const;
|
||||
|
||||
unsigned address_slot(unsigned byte_offset) const;
|
||||
|
||||
/**
|
||||
* Return the size in bytes of a single logical component of the
|
||||
|
@@ -323,4 +323,3 @@ fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates)
|
||||
|
||||
exec_node::remove();
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user