i965/vec4: load dvec3/4 uniforms first in the push constant buffer
Reorder the uniforms to load first the dvec4-aligned variables in the push constant buffer and then push the vec4-aligned ones. It takes into account that the relocated uniforms should be aligned to their channel size. This fixes a bug were the dvec3/4 might be loaded one part on a GRF and the rest in next GRF, so the region parameters to read that could break the HW rules. v2: - Fix broken logic. - Add a comment to explain what should be needed to optimise the usage of the push constant buffer slots, as this patch does not pack the uniforms. v3: - Implemented the push constant buffer usage optimization. Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Cc: "17.1" <mesa-stable@lists.freedesktop.org> Acked-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
@@ -583,16 +583,46 @@ vec4_visitor::split_uniform_registers()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This function returns the register number where we placed the uniform */
|
||||||
|
static int
|
||||||
|
set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
|
||||||
|
const int src, const int size, const int channel_size,
|
||||||
|
int *new_loc, int *new_chan,
|
||||||
|
int *new_chans_used)
|
||||||
|
{
|
||||||
|
int dst;
|
||||||
|
/* Find the lowest place we can slot this uniform in. */
|
||||||
|
for (dst = 0; dst < nr_uniforms; dst++) {
|
||||||
|
if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(dst < nr_uniforms);
|
||||||
|
|
||||||
|
new_loc[src] = dst;
|
||||||
|
new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
|
||||||
|
new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
|
||||||
|
|
||||||
|
*new_uniform_count = MAX2(*new_uniform_count, dst + 1);
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
vec4_visitor::pack_uniform_registers()
|
vec4_visitor::pack_uniform_registers()
|
||||||
{
|
{
|
||||||
uint8_t chans_used[this->uniforms];
|
uint8_t chans_used[this->uniforms];
|
||||||
int new_loc[this->uniforms];
|
int new_loc[this->uniforms];
|
||||||
int new_chan[this->uniforms];
|
int new_chan[this->uniforms];
|
||||||
|
bool is_aligned_to_dvec4[this->uniforms];
|
||||||
|
int new_chans_used[this->uniforms];
|
||||||
|
int channel_sizes[this->uniforms];
|
||||||
|
|
||||||
memset(chans_used, 0, sizeof(chans_used));
|
memset(chans_used, 0, sizeof(chans_used));
|
||||||
memset(new_loc, 0, sizeof(new_loc));
|
memset(new_loc, 0, sizeof(new_loc));
|
||||||
memset(new_chan, 0, sizeof(new_chan));
|
memset(new_chan, 0, sizeof(new_chan));
|
||||||
|
memset(new_chans_used, 0, sizeof(new_chans_used));
|
||||||
|
memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
|
||||||
|
memset(channel_sizes, 0, sizeof(channel_sizes));
|
||||||
|
|
||||||
/* Find which uniform vectors are actually used by the program. We
|
/* Find which uniform vectors are actually used by the program. We
|
||||||
* expect unused vector elements when we've moved array access out
|
* expect unused vector elements when we've moved array access out
|
||||||
@@ -622,7 +652,7 @@ vec4_visitor::pack_uniform_registers()
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
assert(type_sz(inst->src[i].type) % 4 == 0);
|
assert(type_sz(inst->src[i].type) % 4 == 0);
|
||||||
unsigned channel_size = type_sz(inst->src[i].type) / 4;
|
int channel_size = type_sz(inst->src[i].type) / 4;
|
||||||
|
|
||||||
int reg = inst->src[i].nr;
|
int reg = inst->src[i].nr;
|
||||||
for (int c = 0; c < 4; c++) {
|
for (int c = 0; c < 4; c++) {
|
||||||
@@ -631,10 +661,15 @@ vec4_visitor::pack_uniform_registers()
|
|||||||
|
|
||||||
unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
|
unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
|
||||||
unsigned used = MAX2(chans_used[reg], channel * channel_size);
|
unsigned used = MAX2(chans_used[reg], channel * channel_size);
|
||||||
if (used <= 4)
|
if (used <= 4) {
|
||||||
chans_used[reg] = used;
|
chans_used[reg] = used;
|
||||||
else
|
channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
|
||||||
|
} else {
|
||||||
|
is_aligned_to_dvec4[reg] = true;
|
||||||
|
is_aligned_to_dvec4[reg + 1] = true;
|
||||||
chans_used[reg + 1] = used - 4;
|
chans_used[reg + 1] = used - 4;
|
||||||
|
channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -659,42 +694,60 @@ vec4_visitor::pack_uniform_registers()
|
|||||||
|
|
||||||
int new_uniform_count = 0;
|
int new_uniform_count = 0;
|
||||||
|
|
||||||
|
/* As the uniforms are going to be reordered, take the data from a temporary
|
||||||
|
* copy of the original param[].
|
||||||
|
*/
|
||||||
|
gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
|
||||||
|
stage_prog_data->nr_params);
|
||||||
|
memcpy(param, stage_prog_data->param,
|
||||||
|
sizeof(gl_constant_value*) * stage_prog_data->nr_params);
|
||||||
|
|
||||||
/* Now, figure out a packing of the live uniform vectors into our
|
/* Now, figure out a packing of the live uniform vectors into our
|
||||||
* push constants.
|
* push constants. Start with dvec{3,4} because they are aligned to
|
||||||
|
* dvec4 size (2 vec4).
|
||||||
*/
|
*/
|
||||||
for (int src = 0; src < uniforms; src++) {
|
for (int src = 0; src < uniforms; src++) {
|
||||||
int size = chans_used[src];
|
int size = chans_used[src];
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0 || !is_aligned_to_dvec4[src])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int dst;
|
/* dvec3 are aligned to dvec4 size, apply the alignment of the size
|
||||||
/* Find the lowest place we can slot this uniform in. */
|
* to 4 to avoid moving last component of a dvec3 to the available
|
||||||
for (dst = 0; dst < src; dst++) {
|
* location at the end of a previous dvec3. These available locations
|
||||||
if (chans_used[dst] + size <= 4)
|
* could be filled by smaller variables in next loop.
|
||||||
break;
|
*/
|
||||||
|
size = ALIGN(size, 4);
|
||||||
|
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
|
||||||
|
src, size, channel_sizes[src],
|
||||||
|
new_loc, new_chan,
|
||||||
|
new_chans_used);
|
||||||
|
/* Move the references to the data */
|
||||||
|
for (int j = 0; j < size; j++) {
|
||||||
|
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
||||||
|
param[src * 4 + j];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (src == dst) {
|
|
||||||
new_loc[src] = dst;
|
|
||||||
new_chan[src] = 0;
|
|
||||||
} else {
|
|
||||||
new_loc[src] = dst;
|
|
||||||
new_chan[src] = chans_used[dst];
|
|
||||||
|
|
||||||
/* Move the references to the data */
|
|
||||||
for (int j = 0; j < size; j++) {
|
|
||||||
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
|
||||||
stage_prog_data->param[src * 4 + j];
|
|
||||||
}
|
|
||||||
|
|
||||||
chans_used[dst] += size;
|
|
||||||
chans_used[src] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
new_uniform_count = MAX2(new_uniform_count, dst + 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Continue with the rest of data, which is aligned to vec4. */
|
||||||
|
for (int src = 0; src < uniforms; src++) {
|
||||||
|
int size = chans_used[src];
|
||||||
|
|
||||||
|
if (size == 0 || is_aligned_to_dvec4[src])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
|
||||||
|
src, size, channel_sizes[src],
|
||||||
|
new_loc, new_chan,
|
||||||
|
new_chans_used);
|
||||||
|
/* Move the references to the data */
|
||||||
|
for (int j = 0; j < size; j++) {
|
||||||
|
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
||||||
|
param[src * 4 + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ralloc_free(param);
|
||||||
this->uniforms = new_uniform_count;
|
this->uniforms = new_uniform_count;
|
||||||
|
|
||||||
/* Now, update the instructions for our repacked uniforms. */
|
/* Now, update the instructions for our repacked uniforms. */
|
||||||
@@ -705,9 +758,9 @@ vec4_visitor::pack_uniform_registers()
|
|||||||
if (inst->src[i].file != UNIFORM)
|
if (inst->src[i].file != UNIFORM)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
int chan = new_chan[src] / channel_sizes[src];
|
||||||
inst->src[i].nr = new_loc[src];
|
inst->src[i].nr = new_loc[src];
|
||||||
inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
|
inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
|
||||||
new_chan[src], new_chan[src]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user