Files
third_party_mesa3d/src/broadcom/compiler/v3d_nir_lower_io.c
Iago Toral Quiroga 5d578c27ce v3d: add initial compiler plumbing for geometry shaders
Most of the relevant work happens in the v3d_nir_lower_io. Since
geometry shaders can write any number of output vertices, this pass
injects a few variables into the shader code to keep track of things
like the number of vertices emitted or the offsets into the VPM
of the current vertex output, etc. This is also where we handle
EmitVertex() and EmitPrimitive() intrinsics.

The geometry shader VPM output layout has a specific structure
with a 32-bit general header, then another 32-bit header slot for
each output vertex, and finally the actual vertex data.

When vertex shaders are paired with geometry shaders we also need
to consider the following:
  - Only geometry shaders emit fixed function outputs.
  - The coordinate shader used for the vertex stage during binning must
    not drop varyings other than those used by transform feedback, since
    these may be read by the binning GS.

v2:
 - Use MAX3 instead of a chain of MAX2 (Alejandro).
 - Make all loop variables unsigned in ntq_setup_gs_inputs (Alejandro)
 - Update comment in IO owering so it includes the GS stage (Alejandro)

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
2019-12-16 08:42:37 +01:00

625 lines
23 KiB
C

/*
* Copyright © 2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "compiler/v3d_compiler.h"
#include "compiler/nir/nir_builder.h"
/**
* Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
* intrinsics into something amenable to the V3D architecture.
*
* Most of the work is turning the VS's store_output intrinsics from working
* on a base representing the gallium-level vec4 driver_location to an offset
* within the VPM, and emitting the header that's read by the fixed function
* hardware between the VS and FS.
*
* We also adjust the offsets on uniform loads to be in bytes, since that's
* what we need for indirect addressing with general TMU access.
*/
struct v3d_nir_lower_io_state {
int pos_vpm_offset;
int vp_vpm_offset;
int zs_vpm_offset;
int rcp_wc_vpm_offset;
int psiz_vpm_offset;
int varyings_vpm_offset;
/* Geometry shader state */
struct {
/* VPM offset for the current vertex data output */
nir_variable *output_offset_var;
/* VPM offset for the current vertex header */
nir_variable *header_offset_var;
/* VPM header for the current vertex */
nir_variable *header_var;
/* Size of the complete VPM output header */
uint32_t output_header_size;
/* Size of the output data for a single vertex */
uint32_t output_vertex_data_size;
} gs;
BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
nir_ssa_def *pos[4];
};
static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state);
static void
v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
nir_ssa_def *chan)
{
nir_intrinsic_instr *intr =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_store_output);
nir_ssa_dest_init(&intr->instr, &intr->dest,
1, intr->dest.ssa.bit_size, NULL);
intr->num_components = 1;
intr->src[0] = nir_src_for_ssa(chan);
if (offset)
intr->src[1] = nir_src_for_ssa(offset);
else
intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_intrinsic_set_base(intr, base);
nir_intrinsic_set_write_mask(intr, 0x1);
nir_intrinsic_set_component(intr, 0);
nir_builder_instr_insert(b, &intr->instr);
}
/* Convert the uniform offset to bytes. If it happens to be a constant,
* constant-folding will clean up the shift for us.
*/
static void
v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
nir_instr_rewrite_src(&intr->instr,
&intr->src[0],
nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
nir_imm_int(b, 4))));
}
static int
v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan)
{
int component = var->data.location_frac + chan;
uint32_t num_used_outputs = 0;
struct v3d_varying_slot *used_outputs = NULL;
switch (c->s->info.stage) {
case MESA_SHADER_VERTEX:
num_used_outputs = c->vs_key->num_used_outputs;
used_outputs = c->vs_key->used_outputs;
break;
case MESA_SHADER_GEOMETRY:
num_used_outputs = c->gs_key->num_used_outputs;
used_outputs = c->gs_key->used_outputs;
break;
default:
unreachable("Unsupported shader stage");
}
for (int i = 0; i < num_used_outputs; i++) {
struct v3d_varying_slot slot = used_outputs[i];
if (v3d_slot_get_slot(slot) == var->data.location &&
v3d_slot_get_component(slot) == component) {
return i;
}
}
return -1;
}
/* Lowers a store_output(gallium driver location) to a series of store_outputs
* with a driver_location equal to the offset in the VPM.
*
* For geometry shaders we need to emit multiple vertices so the VPM offsets
* need to be computed in the shader code based on the current vertex index.
*/
static void
v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *intr,
struct v3d_nir_lower_io_state *state)
{
b->cursor = nir_before_instr(&intr->instr);
/* If this is a geometry shader we need to emit our outputs
* to the current vertex offset in the VPM.
*/
nir_ssa_def *offset_reg =
c->s->info.stage == MESA_SHADER_GEOMETRY ?
nir_load_var(b, state->gs.output_offset_var) : NULL;
int start_comp = nir_intrinsic_component(intr);
nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
intr->num_components);
nir_variable *var = NULL;
nir_foreach_variable(scan_var, &c->s->outputs) {
if (scan_var->data.driver_location != nir_intrinsic_base(intr) ||
start_comp < scan_var->data.location_frac ||
start_comp >= scan_var->data.location_frac +
glsl_get_components(scan_var->type)) {
continue;
}
var = scan_var;
}
assert(var);
/* Save off the components of the position for the setup of VPM inputs
* read by fixed function HW.
*/
if (var->data.location == VARYING_SLOT_POS) {
for (int i = 0; i < intr->num_components; i++) {
state->pos[start_comp + i] = nir_channel(b, src, i);
}
}
/* Just psiz to the position in the FF header right now. */
if (var->data.location == VARYING_SLOT_PSIZ &&
state->psiz_vpm_offset != -1) {
v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
}
/* Scalarize outputs if it hasn't happened already, since we want to
* schedule each VPM write individually. We can skip any outut
* components not read by the FS.
*/
for (int i = 0; i < intr->num_components; i++) {
int vpm_offset =
v3d_varying_slot_vpm_offset(c, var,
i +
start_comp -
var->data.location_frac);
if (vpm_offset == -1)
continue;
BITSET_SET(state->varyings_stored, vpm_offset);
v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
offset_reg, nir_channel(b, src, i));
}
nir_instr_remove(&intr->instr);
}
static inline void
reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
{
const uint8_t NEW_PRIMITIVE_OFFSET = 0;
const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
uint32_t vertex_data_size = state->gs.output_vertex_data_size;
assert((vertex_data_size & 0xffffff00) == 0);
uint32_t header;
header = 1 << NEW_PRIMITIVE_OFFSET;
header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
}
static void
v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *instr,
struct v3d_nir_lower_io_state *state)
{
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
/* Emit fixed function outputs */
v3d_nir_emit_ff_vpm_outputs(c, b, state);
/* Emit vertex header */
v3d_nir_store_output(b, 0, header_offset, header);
/* Update VPM offset for next vertex output data and header */
output_offset =
nir_iadd(b, output_offset,
nir_imm_int(b, state->gs.output_vertex_data_size));
header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
/* Reset the New Primitive bit */
header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
nir_store_var(b, state->gs.header_var, header, 0x1);
nir_instr_remove(&instr->instr);
}
static void
v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *instr,
struct v3d_nir_lower_io_state *state)
{
assert(state->gs.header_var);
b->cursor = nir_before_instr(&instr->instr);
reset_gs_header(b, state);
nir_instr_remove(&instr->instr);
}
static void
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
struct nir_instr *instr,
struct v3d_nir_lower_io_state *state)
{
if (instr->type != nir_instr_type_intrinsic)
return;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_uniform:
v3d_nir_lower_uniform(c, b, intr);
break;
case nir_intrinsic_store_output:
if (c->s->info.stage == MESA_SHADER_VERTEX ||
c->s->info.stage == MESA_SHADER_GEOMETRY) {
v3d_nir_lower_vpm_output(c, b, intr, state);
}
break;
case nir_intrinsic_emit_vertex:
v3d_nir_lower_emit_vertex(c, b, intr, state);
break;
case nir_intrinsic_end_primitive:
v3d_nir_lower_end_primitive(c, b, intr, state);
break;
default:
break;
}
}
/* Remap the output var's .driver_location. This is purely for
* nir_print_shader() so that store_output can map back to a variable name.
*/
static void
v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
struct v3d_nir_lower_io_state *state)
{
nir_foreach_variable_safe(var, &c->s->outputs) {
if (var->data.location == VARYING_SLOT_POS &&
state->pos_vpm_offset != -1) {
var->data.driver_location = state->pos_vpm_offset;
continue;
}
if (var->data.location == VARYING_SLOT_PSIZ &&
state->psiz_vpm_offset != -1) {
var->data.driver_location = state->psiz_vpm_offset;
continue;
}
int vpm_offset = v3d_varying_slot_vpm_offset(c, var, 0);
if (vpm_offset != -1) {
var->data.driver_location =
state->varyings_vpm_offset + vpm_offset;
} else {
/* If we couldn't find a mapping for the var, delete
* it so that its old .driver_location doesn't confuse
* nir_print_shader().
*/
exec_node_remove(&var->node);
}
}
}
static void
v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
struct v3d_nir_lower_io_state *state)
{
uint32_t vpm_offset = 0;
state->pos_vpm_offset = -1;
state->vp_vpm_offset = -1;
state->zs_vpm_offset = -1;
state->rcp_wc_vpm_offset = -1;
state->psiz_vpm_offset = -1;
bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
if (needs_ff_outputs) {
if (c->vs_key->is_coord) {
state->pos_vpm_offset = vpm_offset;
vpm_offset += 4;
}
state->vp_vpm_offset = vpm_offset;
vpm_offset += 2;
if (!c->vs_key->is_coord) {
state->zs_vpm_offset = vpm_offset++;
state->rcp_wc_vpm_offset = vpm_offset++;
}
if (c->vs_key->per_vertex_point_size)
state->psiz_vpm_offset = vpm_offset++;
}
state->varyings_vpm_offset = vpm_offset;
c->vpm_output_size = vpm_offset + c->vs_key->num_used_outputs;
}
static void
v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
struct v3d_nir_lower_io_state *state)
{
/* 1 header slot for number of output vertices */
uint32_t vpm_offset = 1;
/* 1 header slot per output vertex */
const uint32_t num_vertices = c->s->info.gs.vertices_out;
vpm_offset += num_vertices;
state->gs.output_header_size = vpm_offset;
/* Vertex data: here we only compute offsets into a generic vertex data
* elements. When it is time to actually write a particular vertex to
* the VPM, we will add the offset for that vertex into the VPM output
* to these offsets.
*
* If geometry shaders are present, they are always the last shader
* stage before rasterization, so we always emit fixed function outputs.
*/
vpm_offset = 0;
if (c->gs_key->is_coord) {
state->pos_vpm_offset = vpm_offset;
vpm_offset += 4;
} else {
state->pos_vpm_offset = -1;
}
state->vp_vpm_offset = vpm_offset;
vpm_offset += 2;
if (!c->gs_key->is_coord) {
state->zs_vpm_offset = vpm_offset++;
state->rcp_wc_vpm_offset = vpm_offset++;
} else {
state->zs_vpm_offset = -1;
state->rcp_wc_vpm_offset = -1;
}
/* Mesa enables OES_geometry_shader_point_size automatically with
* OES_geometry_shader so we always need to handle point size
* writes if present.
*/
if (c->gs_key->per_vertex_point_size)
state->psiz_vpm_offset = vpm_offset++;
state->varyings_vpm_offset = vpm_offset;
state->gs.output_vertex_data_size =
state->varyings_vpm_offset + c->gs_key->num_used_outputs;
c->vpm_output_size =
state->gs.output_header_size +
state->gs.output_vertex_data_size * num_vertices;
}
static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state)
{
/* If this is a geometry shader we need to emit our fixed function
* outputs to the current vertex offset in the VPM.
*/
nir_ssa_def *offset_reg =
c->s->info.stage == MESA_SHADER_GEOMETRY ?
nir_load_var(b, state->gs.output_offset_var) : NULL;
for (int i = 0; i < 4; i++) {
if (!state->pos[i])
state->pos[i] = nir_ssa_undef(b, 1, 32);
}
nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
if (state->pos_vpm_offset != -1) {
for (int i = 0; i < 4; i++) {
v3d_nir_store_output(b, state->pos_vpm_offset + i,
offset_reg, state->pos[i]);
}
}
if (state->vp_vpm_offset != -1) {
for (int i = 0; i < 2; i++) {
nir_ssa_def *pos;
nir_ssa_def *scale;
pos = state->pos[i];
if (i == 0)
scale = nir_load_viewport_x_scale(b);
else
scale = nir_load_viewport_y_scale(b);
pos = nir_fmul(b, pos, scale);
pos = nir_fmul(b, pos, rcp_wc);
pos = nir_f2i32(b, nir_fround_even(b, pos));
v3d_nir_store_output(b, state->vp_vpm_offset + i,
offset_reg, pos);
}
}
if (state->zs_vpm_offset != -1) {
nir_ssa_def *z = state->pos[2];
z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
z = nir_fmul(b, z, rcp_wc);
z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
}
if (state->rcp_wc_vpm_offset != -1) {
v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
offset_reg, rcp_wc);
}
/* Store 0 to varyings requested by the FS but not stored by the
* previous stage. This should be undefined behavior, but
* glsl-routing seems to rely on it.
*/
uint32_t num_used_outputs;
switch (c->s->info.stage) {
case MESA_SHADER_VERTEX:
num_used_outputs = c->vs_key->num_used_outputs;
break;
case MESA_SHADER_GEOMETRY:
num_used_outputs = c->gs_key->num_used_outputs;
break;
default:
unreachable("Unsupported shader stage");
}
for (int i = 0; i < num_used_outputs; i++) {
if (!BITSET_TEST(state->varyings_stored, i)) {
v3d_nir_store_output(b, state->varyings_vpm_offset + i,
offset_reg, nir_imm_int(b, 0));
}
}
}
static void
emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
nir_function_impl *impl,
struct v3d_nir_lower_io_state *state)
{
nir_block *first = nir_start_block(impl);
b->cursor = nir_before_block(first);
const struct glsl_type *uint_type = glsl_uint_type();
assert(!state->gs.output_offset_var);
state->gs.output_offset_var =
nir_local_variable_create(impl, uint_type, "output_offset");
nir_store_var(b, state->gs.output_offset_var,
nir_imm_int(b, state->gs.output_header_size), 0x1);
assert(!state->gs.header_offset_var);
state->gs.header_offset_var =
nir_local_variable_create(impl, uint_type, "header_offset");
nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
assert(!state->gs.header_var);
state->gs.header_var =
nir_local_variable_create(impl, uint_type, "header");
reset_gs_header(b, state);
}
static void
emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state)
{
const uint8_t VERTEX_COUNT_OFFSET = 16;
/* Our GS header has 1 generic header slot (at VPM offset 0) and then
* one slot per output vertex after it. This means we don't need to
* have a variable just to keep track of the number of vertices we
* emitted and instead we can just compute it here from the header
* offset variable by removing the one generic header slot that always
* goes at the begining of out header.
*/
nir_ssa_def *header_offset =
nir_load_var(b, state->gs.header_offset_var);
nir_ssa_def *vertex_count =
nir_isub(b, header_offset, nir_imm_int(b, 1));
nir_ssa_def *header =
nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
nir_ishl(b, vertex_count,
nir_imm_int(b, VERTEX_COUNT_OFFSET)));
v3d_nir_store_output(b, 0, NULL, header);
}
void
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
{
struct v3d_nir_lower_io_state state = { 0 };
/* Set up the layout of the VPM outputs. */
switch (s->info.stage) {
case MESA_SHADER_VERTEX:
v3d_nir_setup_vpm_layout_vs(c, &state);
break;
case MESA_SHADER_GEOMETRY:
v3d_nir_setup_vpm_layout_gs(c, &state);
break;
case MESA_SHADER_FRAGMENT:
case MESA_SHADER_COMPUTE:
break;
default:
unreachable("Unsupported shader stage");
}
nir_foreach_function(function, s) {
if (function->impl) {
nir_builder b;
nir_builder_init(&b, function->impl);
if (c->s->info.stage == MESA_SHADER_GEOMETRY)
emit_gs_prolog(c, &b, function->impl, &state);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block)
v3d_nir_lower_io_instr(c, &b, instr,
&state);
}
nir_block *last = nir_impl_last_block(function->impl);
b.cursor = nir_after_block(last);
if (s->info.stage == MESA_SHADER_VERTEX) {
v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
} else if (s->info.stage == MESA_SHADER_GEOMETRY) {
emit_gs_vpm_output_header_prolog(c, &b, &state);
}
nir_metadata_preserve(function->impl,
nir_metadata_block_index |
nir_metadata_dominance);
}
}
if (s->info.stage == MESA_SHADER_VERTEX ||
s->info.stage == MESA_SHADER_GEOMETRY) {
v3d_nir_lower_io_update_output_var_base(c, &state);
}
}