glsl/linker: Fix attempts to split up 64bit varyings between slots

When packing varyings when there is only 32bit of space
left in slot 64bit type is attempted to be divided between
current and next slot. However there is neither code for
splitting the 64bit type nor for assembling it back.
Instead we add 32bit padding.

The above happens only in structs because their
members aren't sorted by packing order.

Example of the issue:

struct S {
 vec3 a;
 double d;
};
out flat S s;

Before, the packing went as:

slot 32b  32b  32b  32b
 0   a.x  a.y  a.z   d
 1    d    0    0    0

After:

slot 32b  32b  32b  32b
 0   a.x  a.y  a.z   0
 1    d    d    0    0

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2333>
This commit is contained in:
Danylo Piliaiev
2019-10-10 17:59:35 +03:00
committed by Marge Bot
parent 6652c5018c
commit c432cb672a
4 changed files with 98 additions and 17 deletions

View File

@@ -1717,7 +1717,6 @@ private:
* Packing order for this varying, computed by compute_packing_order().
*/
packing_order_enum packing_order;
unsigned num_components;
/**
* The output variable in the producer stage.
@@ -1894,9 +1893,6 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
*/
const ir_variable *const var = (consumer_var != NULL)
? consumer_var : producer_var;
const gl_shader_stage stage = (consumer_var != NULL)
? consumer_stage : producer_stage;
const glsl_type *type = get_varying_type(var, stage);
if (producer_var && consumer_var &&
consumer_var->data.must_be_shader_input) {
@@ -1907,15 +1903,6 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
= this->compute_packing_class(var);
this->matches[this->num_matches].packing_order
= this->compute_packing_order(var);
if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
(this->disable_xfb_packing && var->data.is_xfb) ||
var->data.must_be_shader_input) {
unsigned slots = type->count_attribute_slots(false);
this->matches[this->num_matches].num_components = slots * 4;
} else {
this->matches[this->num_matches].num_components
= type->component_slots();
}
this->matches[this->num_matches].producer_var = producer_var;
this->matches[this->num_matches].consumer_var = consumer_var;
@@ -2037,9 +2024,19 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
* inputs, we use the number of slots * 4, as they have different
* counting rules.
*/
unsigned num_components = is_vertex_input ?
type->count_attribute_slots(is_vertex_input) * 4 :
this->matches[i].num_components;
unsigned num_components = 0;
if (is_vertex_input) {
num_components = type->count_attribute_slots(is_vertex_input) * 4;
} else {
if ((this->disable_varying_packing &&
!is_varying_packing_safe(type, var)) ||
(this->disable_xfb_packing && var->data.is_xfb) ||
var->data.must_be_shader_input) {
num_components = type->count_attribute_slots(false) * 4;
} else {
num_components = type->component_slots_aligned(*location);
}
}
/* The last slot for this variable, inclusive. */
unsigned slot_end = *location + num_components - 1;

View File

@@ -554,6 +554,16 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
false, vertex_index);
} else if (rvalue->type->vector_elements * dmul +
fine_location % 4 > 4) {
/* We don't have code to split up 64bit variable between two
* varying slots, instead we add padding if necessary.
*/
unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul);
if (aligned_fine_location != fine_location) {
return this->lower_rvalue(rvalue, aligned_fine_location,
unpacked_var, name, false,
vertex_index);
}
/* This vector is going to be "double parked" across two varying slots,
* so handle it as two separate assignments. For doubles, a dvec3/dvec4
* can end up being spread over 3 slots. However the second splitting
@@ -567,8 +577,8 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
left_components = 4 - fine_location % 4;
if (rvalue->type->is_64bit()) {
/* We might actually end up with 0 left components! */
left_components /= 2;
assert(left_components > 0);
}
right_components = rvalue->type->vector_elements - left_components;
@@ -609,6 +619,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
unsigned location_frac = fine_location % 4;
for (unsigned i = 0; i < components; ++i)
swizzle_values[i] = i + location_frac;
assert(this->components[location - VARYING_SLOT_VAR0] >= components);
ir_dereference *packed_deref =
this->get_packed_varying_deref(location, unpacked_var, name,
vertex_index);
@@ -655,6 +666,11 @@ lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue,
bool gs_input_toplevel,
unsigned vertex_index)
{
unsigned dmul = rvalue->type->without_array()->is_64bit() ? 2 : 1;
if (array_size * dmul + fine_location % 4 > 4) {
fine_location = ALIGN_POT(fine_location, dmul);
}
for (unsigned i = 0; i < array_size; i++) {
if (i != 0)
rvalue = rvalue->clone(this->mem_ctx, NULL);