glsl/linker: Fix attempts to split up 64bit varyings between slots

When packing varyings when there is only 32bit of space
left in slot 64bit type is attempted to be divided between
current and next slot. However there is neither code for
splitting the 64bit type nor for assembling it back.
Instead we add 32bit padding.

The above happens only in structs because their
members aren't sorted by packing order.

Example of the issue:

struct S {
 vec3 a;
 double d;
};
out flat S s;

Before, the packing went as:

slot 32b  32b  32b  32b
 0   a.x  a.y  a.z   d
 1    d    0    0    0

After:

slot 32b  32b  32b  32b
 0   a.x  a.y  a.z   0
 1    d    d    0    0

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2333>
This commit is contained in:
Danylo Piliaiev
2019-10-10 17:59:35 +03:00
committed by Marge Bot
parent 6652c5018c
commit c432cb672a
4 changed files with 98 additions and 17 deletions

View File

@@ -1717,7 +1717,6 @@ private:
* Packing order for this varying, computed by compute_packing_order().
*/
packing_order_enum packing_order;
unsigned num_components;
/**
* The output variable in the producer stage.
@@ -1894,9 +1893,6 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
*/
const ir_variable *const var = (consumer_var != NULL)
? consumer_var : producer_var;
const gl_shader_stage stage = (consumer_var != NULL)
? consumer_stage : producer_stage;
const glsl_type *type = get_varying_type(var, stage);
if (producer_var && consumer_var &&
consumer_var->data.must_be_shader_input) {
@@ -1907,15 +1903,6 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
= this->compute_packing_class(var);
this->matches[this->num_matches].packing_order
= this->compute_packing_order(var);
if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
(this->disable_xfb_packing && var->data.is_xfb) ||
var->data.must_be_shader_input) {
unsigned slots = type->count_attribute_slots(false);
this->matches[this->num_matches].num_components = slots * 4;
} else {
this->matches[this->num_matches].num_components
= type->component_slots();
}
this->matches[this->num_matches].producer_var = producer_var;
this->matches[this->num_matches].consumer_var = consumer_var;
@@ -2037,9 +2024,19 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
* inputs, we use the number of slots * 4, as they have different
* counting rules.
*/
unsigned num_components = is_vertex_input ?
type->count_attribute_slots(is_vertex_input) * 4 :
this->matches[i].num_components;
unsigned num_components = 0;
if (is_vertex_input) {
num_components = type->count_attribute_slots(is_vertex_input) * 4;
} else {
if ((this->disable_varying_packing &&
!is_varying_packing_safe(type, var)) ||
(this->disable_xfb_packing && var->data.is_xfb) ||
var->data.must_be_shader_input) {
num_components = type->count_attribute_slots(false) * 4;
} else {
num_components = type->component_slots_aligned(*location);
}
}
/* The last slot for this variable, inclusive. */
unsigned slot_end = *location + num_components - 1;

View File

@@ -554,6 +554,16 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
false, vertex_index);
} else if (rvalue->type->vector_elements * dmul +
fine_location % 4 > 4) {
/* We don't have code to split up 64bit variable between two
* varying slots, instead we add padding if necessary.
*/
unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul);
if (aligned_fine_location != fine_location) {
return this->lower_rvalue(rvalue, aligned_fine_location,
unpacked_var, name, false,
vertex_index);
}
/* This vector is going to be "double parked" across two varying slots,
* so handle it as two separate assignments. For doubles, a dvec3/dvec4
* can end up being spread over 3 slots. However the second splitting
@@ -567,8 +577,8 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
left_components = 4 - fine_location % 4;
if (rvalue->type->is_64bit()) {
/* We might actually end up with 0 left components! */
left_components /= 2;
assert(left_components > 0);
}
right_components = rvalue->type->vector_elements - left_components;
@@ -609,6 +619,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
unsigned location_frac = fine_location % 4;
for (unsigned i = 0; i < components; ++i)
swizzle_values[i] = i + location_frac;
assert(this->components[location - VARYING_SLOT_VAR0] >= components);
ir_dereference *packed_deref =
this->get_packed_varying_deref(location, unpacked_var, name,
vertex_index);
@@ -655,6 +666,11 @@ lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue,
bool gs_input_toplevel,
unsigned vertex_index)
{
unsigned dmul = rvalue->type->without_array()->is_64bit() ? 2 : 1;
if (array_size * dmul + fine_location % 4 > 4) {
fine_location = ALIGN_POT(fine_location, dmul);
}
for (unsigned i = 0; i < array_size; i++) {
if (i != 0)
rvalue = rvalue->clone(this->mem_ctx, NULL);

View File

@@ -1643,6 +1643,72 @@ glsl_type::component_slots() const
return 0;
}
unsigned
glsl_type::component_slots_aligned(unsigned offset) const
{
/* Align 64bit type only if it crosses attribute slot boundary. */
switch (this->base_type) {
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT8:
case GLSL_TYPE_INT8:
case GLSL_TYPE_UINT16:
case GLSL_TYPE_INT16:
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_FLOAT16:
case GLSL_TYPE_BOOL:
return this->components();
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64: {
unsigned size = 2 * this->components();
if (offset % 2 == 1 && (offset % 4 + size) > 4) {
size++;
}
return size;
}
case GLSL_TYPE_STRUCT:
case GLSL_TYPE_INTERFACE: {
unsigned size = 0;
for (unsigned i = 0; i < this->length; i++) {
const glsl_type *member = this->fields.structure[i].type;
size += member->component_slots_aligned(size + offset);
}
return size;
}
case GLSL_TYPE_ARRAY: {
unsigned size = 0;
for (unsigned i = 0; i < this->length; i++) {
size += this->fields.array->component_slots_aligned(size + offset);
}
return size;
}
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
return 2 + ((offset % 4) == 3 ? 1 : 0);
case GLSL_TYPE_SUBROUTINE:
return 1;
case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
break;
}
return 0;
}
unsigned
glsl_type::struct_location_offset(unsigned length) const
{

View File

@@ -521,6 +521,8 @@ public:
*/
unsigned component_slots() const;
unsigned component_slots_aligned(unsigned offset) const;
/**
* Calculate offset between the base location of the struct in
* uniform storage and a struct member.