intel/brw: Add a src array for the common case in fs_inst

In the common case, fs_inst will have up to 4 sources (the HW
instructions have up to 3, and our representation of SENDs have 4).
Embed such array into the fs_inst, and use it whenever applicable
instead of allocating a new array.

Also change the code to reuse the allocated src array when resizing to
a smaller length.

Between the changes above and the reduced amount of initializing
fs_regs, this reduces fossil-db time by around 2% for Borderlands 3
and Rise of the Tomb Raider, and around 1.5% for Total War Warhammer 3.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28379>
This commit is contained in:
Caio Oliveira
2024-03-21 15:42:44 -07:00
committed by Marge Bot
parent dae9795628
commit d9e737212d
2 changed files with 59 additions and 15 deletions

View File

@@ -47,19 +47,22 @@
using namespace brw;
static void
initialize_sources(fs_inst *inst, const fs_reg src[], uint8_t num_sources);
void
fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg *src, unsigned sources)
{
memset((void*)this, 0, sizeof(*this));
this->src = new fs_reg[MAX2(sources, 3)];
initialize_sources(this, src, sources);
for (unsigned i = 0; i < sources; i++)
this->src[i] = src[i];
this->opcode = opcode;
this->dst = dst;
this->sources = sources;
this->exec_size = exec_size;
assert(dst.file != IMM && dst.file != UNIFORM);
@@ -132,31 +135,71 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
fs_inst::fs_inst(const fs_inst &that)
{
memcpy((void*)this, &that, sizeof(that));
this->src = new fs_reg[MAX2(that.sources, 3)];
for (unsigned i = 0; i < that.sources; i++)
this->src[i] = that.src[i];
initialize_sources(this, that.src, that.sources);
}
fs_inst::~fs_inst()
{
delete[] this->src;
if (this->src != this->builtin_src)
delete[] this->src;
}
static void
initialize_sources(fs_inst *inst, const fs_reg src[], uint8_t num_sources)
{
if (num_sources > ARRAY_SIZE(inst->builtin_src))
inst->src = new fs_reg[num_sources];
else
inst->src = inst->builtin_src;
for (unsigned i = 0; i < num_sources; i++)
inst->src[i] = src[i];
inst->sources = num_sources;
}
void
fs_inst::resize_sources(uint8_t num_sources)
{
if (this->sources != num_sources) {
fs_reg *src = new fs_reg[MAX2(num_sources, 3)];
if (this->sources == num_sources)
return;
for (unsigned i = 0; i < MIN2(this->sources, num_sources); ++i)
src[i] = this->src[i];
fs_reg *old_src = this->src;
fs_reg *new_src;
delete[] this->src;
this->src = src;
this->sources = num_sources;
const unsigned builtin_size = ARRAY_SIZE(this->builtin_src);
if (old_src == this->builtin_src) {
if (num_sources > builtin_size) {
new_src = new fs_reg[num_sources];
for (unsigned i = 0; i < this->sources; i++)
new_src[i] = old_src[i];
} else {
new_src = old_src;
}
} else {
if (num_sources <= builtin_size) {
new_src = this->builtin_src;
assert(this->sources > num_sources);
for (unsigned i = 0; i < num_sources; i++)
new_src[i] = old_src[i];
} else if (num_sources < this->sources) {
new_src = old_src;
} else {
new_src = new fs_reg[num_sources];
for (unsigned i = 0; i < num_sources; i++)
new_src[i] = old_src[i];
}
if (old_src != new_src)
delete[] old_src;
}
this->sources = num_sources;
this->src = new_src;
}
void

View File

@@ -566,6 +566,7 @@ public:
fs_reg dst;
fs_reg *src;
fs_reg builtin_src[4];
};
/**