vc4: Convert to using nir_lower_io_scalar for FS inputs.
The scalarizing of FS inputs can be done in a non-driver-dependent manner, so extract it out of the driver.
This commit is contained in:
@@ -29,9 +29,10 @@
|
||||
* Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
|
||||
* something amenable to the VC4 architecture.
|
||||
*
|
||||
* Currently, it split inputs, outputs, and uniforms into scalars, drops any
|
||||
* non-position outputs in coordinate shaders, and fixes up the addressing on
|
||||
* indirect uniform loads.
|
||||
* Currently, it splits outputs, VS inputs, and uniforms into scalars, drops
|
||||
* any non-position outputs in coordinate shaders, and fixes up the addressing
|
||||
* on indirect uniform loads. FS input scalarization is handled by
|
||||
* nir_lower_io_to_scalar().
|
||||
*/
|
||||
|
||||
static void
|
||||
@@ -228,11 +229,22 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
|
||||
replace_intrinsic_with_vec4(b, intr, dests);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_point_sprite(struct vc4_compile *c, nir_variable *var)
|
||||
{
|
||||
if (var->data.location < VARYING_SLOT_VAR0 ||
|
||||
var->data.location > VARYING_SLOT_VAR31)
|
||||
return false;
|
||||
|
||||
return (c->fs_key->point_sprite_mask &
|
||||
(1 << (var->data.location - VARYING_SLOT_VAR0)));
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
if (nir_intrinsic_base(intr) >= VC4_NIR_TLB_COLOR_READ_INPUT &&
|
||||
nir_intrinsic_base(intr) < (VC4_NIR_TLB_COLOR_READ_INPUT +
|
||||
@@ -250,50 +262,42 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
|
||||
}
|
||||
assert(input_var);
|
||||
|
||||
/* All TGSI-to-NIR inputs are vec4. */
|
||||
assert(intr->num_components == 4);
|
||||
int comp = nir_intrinsic_component(intr);
|
||||
|
||||
/* We only accept direct inputs and TGSI only ever gives them to us
|
||||
* with an offset value of 0.
|
||||
*/
|
||||
assert(nir_src_as_const_value(intr->src[0]) &&
|
||||
nir_src_as_const_value(intr->src[0])->u32[0] == 0);
|
||||
/* Lower away point coordinates, and fix up PNTC. */
|
||||
if (is_point_sprite(c, input_var) ||
|
||||
input_var->data.location == VARYING_SLOT_PNTC) {
|
||||
assert(intr->num_components == 1);
|
||||
|
||||
/* Generate scalar loads equivalent to the original VEC4. */
|
||||
nir_ssa_def *dests[4];
|
||||
for (unsigned i = 0; i < intr->num_components; i++) {
|
||||
nir_intrinsic_instr *intr_comp =
|
||||
nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
|
||||
intr_comp->num_components = 1;
|
||||
nir_intrinsic_set_base(intr_comp,
|
||||
nir_intrinsic_base(intr) * 4 + i);
|
||||
intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
nir_ssa_def *result = &intr->dest.ssa;
|
||||
|
||||
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
|
||||
nir_builder_instr_insert(b, &intr_comp->instr);
|
||||
switch (comp) {
|
||||
case 0:
|
||||
case 1:
|
||||
/* If we're not rendering points, we need to set a
|
||||
* defined value for the input that would come from
|
||||
* PNTC.
|
||||
*/
|
||||
if (!c->fs_key->is_points)
|
||||
result = nir_imm_float(b, 0.0);
|
||||
break;
|
||||
case 2:
|
||||
result = nir_imm_float(b, 0.0);
|
||||
break;
|
||||
case 3:
|
||||
result = nir_imm_float(b, 1.0);
|
||||
break;
|
||||
}
|
||||
|
||||
dests[i] = &intr_comp->dest.ssa;
|
||||
}
|
||||
if (c->fs_key->point_coord_upper_left && comp == 1)
|
||||
result = nir_fsub(b, nir_imm_float(b, 1.0), result);
|
||||
|
||||
if (input_var->data.location >= VARYING_SLOT_VAR0) {
|
||||
if (c->fs_key->point_sprite_mask &
|
||||
(1 << (input_var->data.location -
|
||||
VARYING_SLOT_VAR0))) {
|
||||
if (!c->fs_key->is_points) {
|
||||
dests[0] = nir_imm_float(b, 0.0);
|
||||
dests[1] = nir_imm_float(b, 0.0);
|
||||
}
|
||||
if (c->fs_key->point_coord_upper_left) {
|
||||
dests[1] = nir_fsub(b,
|
||||
nir_imm_float(b, 1.0),
|
||||
dests[1]);
|
||||
}
|
||||
dests[2] = nir_imm_float(b, 0.0);
|
||||
dests[3] = nir_imm_float(b, 1.0);
|
||||
if (result != &intr->dest.ssa) {
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
|
||||
nir_src_for_ssa(result),
|
||||
result->parent_instr);
|
||||
}
|
||||
}
|
||||
|
||||
replace_intrinsic_with_vec4(b, intr, dests);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -1611,7 +1611,8 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
|
||||
assert(instr->num_components == 1);
|
||||
const_offset = nir_src_as_const_value(instr->src[0]);
|
||||
assert(const_offset && "vc4 doesn't support indirect inputs");
|
||||
if (nir_intrinsic_base(instr) >= VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
if (c->stage == QSTAGE_FRAG &&
|
||||
nir_intrinsic_base(instr) >= VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
assert(const_offset->u32[0] == 0);
|
||||
/* Reads of the per-sample color need to be done in
|
||||
* order.
|
||||
@@ -1626,6 +1627,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
|
||||
}
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
c->color_reads[sample_index]);
|
||||
} else if (c->stage == QSTAGE_FRAG) {
|
||||
offset = nir_intrinsic_base(instr) + const_offset->u32[0];
|
||||
int comp = nir_intrinsic_component(instr);
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
c->inputs[offset * 4 + comp]);
|
||||
} else {
|
||||
offset = nir_intrinsic_base(instr) + const_offset->u32[0];
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
@@ -2061,10 +2067,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
||||
if (c->vs_key && c->vs_key->clamp_color)
|
||||
NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
|
||||
|
||||
if (stage == QSTAGE_FRAG)
|
||||
if (stage == QSTAGE_FRAG) {
|
||||
NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
|
||||
else
|
||||
} else {
|
||||
NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
|
||||
}
|
||||
|
||||
/* FS input scalarizing must happen after nir_lower_two_sided_color,
|
||||
* which only handles a vec4 at a time.
|
||||
*/
|
||||
if (c->stage == QSTAGE_FRAG)
|
||||
NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
|
||||
|
||||
NIR_PASS_V(c->s, vc4_nir_lower_io, c);
|
||||
NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
|
||||
@@ -2168,6 +2181,7 @@ vc4_shader_state_create(struct pipe_context *pctx,
|
||||
NIR_PASS_V(s, nir_opt_global_to_local);
|
||||
NIR_PASS_V(s, nir_convert_to_ssa);
|
||||
NIR_PASS_V(s, nir_normalize_cubemap_coords);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_load_const_to_scalar);
|
||||
|
||||
vc4_optimize_nir(s);
|
||||
|
Reference in New Issue
Block a user