nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes

So far, input_reads was a bitmap tracking which vertex input locations
were being used.

In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.

But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.

To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.

As example, if in our GLSL/IR shader we have three attributes:

layout(location = 0) vec3  attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;

then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.

Checking carefully, basically we are using slots rather than locations
in NIR.

When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.

v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.

v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
Juan A. Suarez Romero
2016-12-16 10:24:43 +01:00
parent 3551a2d3ad
commit c2acf97fcc
9 changed files with 107 additions and 60 deletions

View File

@@ -53,11 +53,6 @@ set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len)
else
shader->info->inputs_read |= bitfield;
/* double inputs read is only for vertex inputs */
if (shader->stage == MESA_SHADER_VERTEX &&
glsl_type_is_dual_slot(glsl_without_array(var->type)))
shader->info->double_inputs_read |= bitfield;
if (shader->stage == MESA_SHADER_FRAGMENT) {
shader->info->fs.uses_sample_qualifier |= var->data.sample;
}
@@ -83,26 +78,21 @@ static void
mark_whole_variable(nir_shader *shader, nir_variable *var)
{
const struct glsl_type *type = var->type;
bool is_vertex_input = false;
if (nir_is_per_vertex_io(var, shader->stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
if (shader->stage == MESA_SHADER_VERTEX &&
var->data.mode == nir_var_shader_in)
is_vertex_input = true;
const unsigned slots =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
: glsl_count_attribute_slots(type, is_vertex_input);
: glsl_count_attribute_slots(type, false);
set_io_mask(shader, var, 0, slots);
}
static unsigned
get_io_offset(nir_deref_var *deref, bool is_vertex_input)
get_io_offset(nir_deref_var *deref)
{
unsigned offset = 0;
@@ -117,7 +107,7 @@ get_io_offset(nir_deref_var *deref, bool is_vertex_input)
return -1;
}
offset += glsl_count_attribute_slots(tail->type, is_vertex_input) *
offset += glsl_count_attribute_slots(tail->type, false) *
deref_array->base_offset;
}
/* TODO: we can get the offset for structs here see nir_lower_io() */
@@ -163,12 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref)
return false;
}
bool is_vertex_input = false;
if (shader->stage == MESA_SHADER_VERTEX &&
var->data.mode == nir_var_shader_in)
is_vertex_input = true;
unsigned offset = get_io_offset(deref, is_vertex_input);
unsigned offset = get_io_offset(deref);
if (offset == -1)
return false;
@@ -184,8 +169,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref)
}
/* double element width for double types that takes two slots */
if (!is_vertex_input &&
glsl_type_is_dual_slot(glsl_without_array(type))) {
if (glsl_type_is_dual_slot(glsl_without_array(type))) {
elem_width *= 2;
}
@@ -220,13 +204,27 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader)
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
case nir_intrinsic_load_var:
case nir_intrinsic_store_var:
if (instr->variables[0]->var->data.mode == nir_var_shader_in ||
instr->variables[0]->var->data.mode == nir_var_shader_out) {
case nir_intrinsic_store_var: {
nir_variable *var = instr->variables[0]->var;
if (var->data.mode == nir_var_shader_in ||
var->data.mode == nir_var_shader_out) {
if (!try_mask_partial_io(shader, instr->variables[0]))
mark_whole_variable(shader, instr->variables[0]->var);
mark_whole_variable(shader, var);
/* We need to track which input_reads bits correspond to a
* dvec3/dvec4 input attribute */
if (shader->stage == MESA_SHADER_VERTEX &&
var->data.mode == nir_var_shader_in &&
glsl_type_is_dual_slot(glsl_without_array(var->type))) {
for (uint i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
int idx = var->data.location + i;
shader->info->double_inputs_read |= BITFIELD64_BIT(idx);
}
}
}
break;
}
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_front_face: