i965/vec4: load dvec3/4 uniforms first in the push constant buffer
Reorder the uniforms to load first the dvec4-aligned variables in the push constant buffer and then push the vec4-aligned ones. It takes into account that the relocated uniforms should be aligned to their channel size. This fixes a bug were the dvec3/4 might be loaded one part on a GRF and the rest in next GRF, so the region parameters to read that could break the HW rules. v2: - Fix broken logic. - Add a comment to explain what should be needed to optimise the usage of the push constant buffer slots, as this patch does not pack the uniforms. v3: - Implemented the push constant buffer usage optimization. Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Cc: "17.1" <mesa-stable@lists.freedesktop.org> Acked-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
@@ -583,16 +583,46 @@ vec4_visitor::split_uniform_registers()
|
||||
}
|
||||
}
|
||||
|
||||
/* This function returns the register number where we placed the uniform */
|
||||
static int
|
||||
set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
|
||||
const int src, const int size, const int channel_size,
|
||||
int *new_loc, int *new_chan,
|
||||
int *new_chans_used)
|
||||
{
|
||||
int dst;
|
||||
/* Find the lowest place we can slot this uniform in. */
|
||||
for (dst = 0; dst < nr_uniforms; dst++) {
|
||||
if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
|
||||
break;
|
||||
}
|
||||
|
||||
assert(dst < nr_uniforms);
|
||||
|
||||
new_loc[src] = dst;
|
||||
new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
|
||||
new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
|
||||
|
||||
*new_uniform_count = MAX2(*new_uniform_count, dst + 1);
|
||||
return dst;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::pack_uniform_registers()
|
||||
{
|
||||
uint8_t chans_used[this->uniforms];
|
||||
int new_loc[this->uniforms];
|
||||
int new_chan[this->uniforms];
|
||||
bool is_aligned_to_dvec4[this->uniforms];
|
||||
int new_chans_used[this->uniforms];
|
||||
int channel_sizes[this->uniforms];
|
||||
|
||||
memset(chans_used, 0, sizeof(chans_used));
|
||||
memset(new_loc, 0, sizeof(new_loc));
|
||||
memset(new_chan, 0, sizeof(new_chan));
|
||||
memset(new_chans_used, 0, sizeof(new_chans_used));
|
||||
memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
|
||||
memset(channel_sizes, 0, sizeof(channel_sizes));
|
||||
|
||||
/* Find which uniform vectors are actually used by the program. We
|
||||
* expect unused vector elements when we've moved array access out
|
||||
@@ -622,7 +652,7 @@ vec4_visitor::pack_uniform_registers()
|
||||
continue;
|
||||
|
||||
assert(type_sz(inst->src[i].type) % 4 == 0);
|
||||
unsigned channel_size = type_sz(inst->src[i].type) / 4;
|
||||
int channel_size = type_sz(inst->src[i].type) / 4;
|
||||
|
||||
int reg = inst->src[i].nr;
|
||||
for (int c = 0; c < 4; c++) {
|
||||
@@ -631,10 +661,15 @@ vec4_visitor::pack_uniform_registers()
|
||||
|
||||
unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
|
||||
unsigned used = MAX2(chans_used[reg], channel * channel_size);
|
||||
if (used <= 4)
|
||||
if (used <= 4) {
|
||||
chans_used[reg] = used;
|
||||
else
|
||||
channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
|
||||
} else {
|
||||
is_aligned_to_dvec4[reg] = true;
|
||||
is_aligned_to_dvec4[reg + 1] = true;
|
||||
chans_used[reg + 1] = used - 4;
|
||||
channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -659,42 +694,60 @@ vec4_visitor::pack_uniform_registers()
|
||||
|
||||
int new_uniform_count = 0;
|
||||
|
||||
/* As the uniforms are going to be reordered, take the data from a temporary
|
||||
* copy of the original param[].
|
||||
*/
|
||||
gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
|
||||
stage_prog_data->nr_params);
|
||||
memcpy(param, stage_prog_data->param,
|
||||
sizeof(gl_constant_value*) * stage_prog_data->nr_params);
|
||||
|
||||
/* Now, figure out a packing of the live uniform vectors into our
|
||||
* push constants.
|
||||
* push constants. Start with dvec{3,4} because they are aligned to
|
||||
* dvec4 size (2 vec4).
|
||||
*/
|
||||
for (int src = 0; src < uniforms; src++) {
|
||||
int size = chans_used[src];
|
||||
|
||||
if (size == 0)
|
||||
if (size == 0 || !is_aligned_to_dvec4[src])
|
||||
continue;
|
||||
|
||||
int dst;
|
||||
/* Find the lowest place we can slot this uniform in. */
|
||||
for (dst = 0; dst < src; dst++) {
|
||||
if (chans_used[dst] + size <= 4)
|
||||
break;
|
||||
}
|
||||
|
||||
if (src == dst) {
|
||||
new_loc[src] = dst;
|
||||
new_chan[src] = 0;
|
||||
} else {
|
||||
new_loc[src] = dst;
|
||||
new_chan[src] = chans_used[dst];
|
||||
|
||||
/* dvec3 are aligned to dvec4 size, apply the alignment of the size
|
||||
* to 4 to avoid moving last component of a dvec3 to the available
|
||||
* location at the end of a previous dvec3. These available locations
|
||||
* could be filled by smaller variables in next loop.
|
||||
*/
|
||||
size = ALIGN(size, 4);
|
||||
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
|
||||
src, size, channel_sizes[src],
|
||||
new_loc, new_chan,
|
||||
new_chans_used);
|
||||
/* Move the references to the data */
|
||||
for (int j = 0; j < size; j++) {
|
||||
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
||||
stage_prog_data->param[src * 4 + j];
|
||||
param[src * 4 + j];
|
||||
}
|
||||
}
|
||||
|
||||
chans_used[dst] += size;
|
||||
chans_used[src] = 0;
|
||||
}
|
||||
|
||||
new_uniform_count = MAX2(new_uniform_count, dst + 1);
|
||||
/* Continue with the rest of data, which is aligned to vec4. */
|
||||
for (int src = 0; src < uniforms; src++) {
|
||||
int size = chans_used[src];
|
||||
|
||||
if (size == 0 || is_aligned_to_dvec4[src])
|
||||
continue;
|
||||
|
||||
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
|
||||
src, size, channel_sizes[src],
|
||||
new_loc, new_chan,
|
||||
new_chans_used);
|
||||
/* Move the references to the data */
|
||||
for (int j = 0; j < size; j++) {
|
||||
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
||||
param[src * 4 + j];
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(param);
|
||||
this->uniforms = new_uniform_count;
|
||||
|
||||
/* Now, update the instructions for our repacked uniforms. */
|
||||
@@ -705,9 +758,9 @@ vec4_visitor::pack_uniform_registers()
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
continue;
|
||||
|
||||
int chan = new_chan[src] / channel_sizes[src];
|
||||
inst->src[i].nr = new_loc[src];
|
||||
inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
|
||||
new_chan[src], new_chan[src]);
|
||||
inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user