brw: add infra to make use of the address register in the IR

This limits the address register to simple cases inside a block.

Validation ensures that the address register is only written once and
read once.

Instruction scheduling makes sure that instructions using the address
register in the generator are not scheduled while there is an usage of
the register in the IR.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28199>
This commit is contained in:
Lionel Landwerlin
2024-03-13 11:01:16 +02:00
committed by Marge Bot
parent c9fa235c28
commit 0a5bdf1199
10 changed files with 99 additions and 4 deletions

View File

@@ -799,6 +799,19 @@ fs_inst::is_raw_move() const
brw_type_size_bits(src[0].type) == brw_type_size_bits(dst.type));
}
bool
fs_inst::uses_address_register_implicitly() const
{
switch (opcode) {
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_SHUFFLE:
case SHADER_OPCODE_MOV_INDIRECT:
return true;
default:
return false;
}
}
/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
* This brings in those uniform definitions
*/

View File

@@ -448,6 +448,8 @@ public:
/* The API selected subgroup size */
unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
unsigned next_address_register_nr;
struct brw_shader_stats shader_stats;
void debug_optimizer(const nir_shader *nir,

View File

@@ -212,6 +212,14 @@ namespace brw {
return retype(null_reg_ud(), type);
}
brw_reg
vaddr(enum brw_reg_type type, unsigned subnr) const
{
brw_reg addr = brw_address_reg(subnr);
addr.nr = shader->next_address_register_nr++;
return retype(addr, type);
}
/**
* Create a null register of floating type.
*/

View File

@@ -281,6 +281,16 @@ brw_fs_validate(const fs_visitor &s)
s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
foreach_block(block, s.cfg) {
/* Track the last used address register. Usage of the address register
* in the IR should be limited to within a block, otherwise we would
* unable to schedule some instructions without spilling the address
* register to a VGRF.
*
* Another pattern we stick to when using the address register in the IR
* is that we write and read the register in pairs of instruction.
*/
uint32_t last_used_address_register[16] = {};
foreach_inst_in_block (fs_inst, inst, block) {
brw_validate_instruction_phase(s, inst);
@@ -392,15 +402,24 @@ brw_fs_validate(const fs_visitor &s)
if (inst->dst.file == VGRF) {
fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
s.alloc.sizes[inst->dst.nr]);
if (inst->exec_size > 1)
fsv_assert_ne(inst->dst.stride, 0);
} else if (inst->dst.is_address()) {
fsv_assert(inst->dst.nr != 0);
}
bool read_address_reg = false;
for (unsigned i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
s.alloc.sizes[inst->src[i].nr]);
} else if (inst->src[i].is_address()) {
fsv_assert(inst->src[i].nr != 0);
for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) {
fsv_assert_eq(inst->src[i].nr,
last_used_address_register[inst->src[i].address_slot(hw)]);
}
read_address_reg = true;
}
}
@@ -516,6 +535,30 @@ brw_fs_validate(const fs_visitor &s)
inst->src[i].type != BRW_TYPE_HF);
}
}
/* Update the last used address register. */
if (read_address_reg) {
/* When an instruction only reads the address register, we assume
* the read parts are never going to be used again.
*/
for (unsigned i = 0; i < inst->sources; i++) {
if (!inst->src[i].is_address())
continue;
for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2)
last_used_address_register[inst->src[i].address_slot(hw)] = 0;
}
}
if (inst->dst.is_address()) {
/* For the written part of the address register */
for (unsigned hw = 0; hw < inst->size_written; hw += 2)
last_used_address_register[inst->dst.address_slot(hw)] = inst->dst.nr;
} else if (inst->uses_address_register_implicitly()) {
/* If the instruction is making use of the address register,
* discard the entire thing.
*/
memset(last_used_address_register, 0,
sizeof(last_used_address_register));
}
}
}
}

View File

@@ -471,6 +471,8 @@ fs_visitor::init()
this->spilled_any_registers = false;
this->phase = BRW_SHADER_PHASE_INITIAL;
this->next_address_register_nr = 1;
}
fs_visitor::~fs_visitor()

View File

@@ -122,6 +122,12 @@ public:
*/
bool has_sampler_residency() const;
/**
* Return true if this instruction is using the address register
* implicitly.
*/
bool uses_address_register_implicitly() const;
uint8_t sources; /**< Number of brw_reg sources. */
/**

View File

@@ -434,7 +434,10 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons
fprintf(file, "***attr%d***", inst->dst.nr);
break;
case ADDRESS:
fprintf(file, "a0.%d", inst->dst.subnr);
if (inst->dst.nr == 0)
fprintf(file, "a0.%d", inst->dst.subnr);
else
fprintf(file, "va%u.%d", inst->dst.nr, inst->dst.subnr);
break;
case ARF:
switch (inst->dst.nr & 0xF0) {
@@ -498,7 +501,10 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons
fprintf(file, "g%d", inst->src[i].nr);
break;
case ADDRESS:
fprintf(file, "a0.%d", inst->src[i].subnr);
if (inst->src[i].nr == 0)
fprintf(file, "a0.%d", inst->src[i].subnr);
else
fprintf(file, "va%u.%d", inst->src[i].nr, inst->src[i].subnr);
break;
case ATTR:
fprintf(file, "attr%d", inst->src[i].nr);

View File

@@ -263,6 +263,19 @@ brw_reg::is_accumulator() const
return file == ARF && (nr & 0xF0) == BRW_ARF_ACCUMULATOR;
}
bool
brw_reg::is_address() const
{
return file == ADDRESS;
}
unsigned
brw_reg::address_slot(unsigned byte_offset) const
{
assert(is_address());
return (reg_offset(*this) + byte_offset) / 2;
}
bool
brw_reg::equals(const brw_reg &r) const
{

View File

@@ -226,6 +226,9 @@ typedef struct brw_reg {
bool is_negative_one() const;
bool is_null() const;
bool is_accumulator() const;
bool is_address() const;
unsigned address_slot(unsigned byte_offset) const;
/**
* Return the size in bytes of a single logical component of the

View File

@@ -323,4 +323,3 @@ fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates)
exec_node::remove();
}