freedreno/ir3: rework setup_{input,output} to make struct varyings work

Rework setup_{input,output} to be called during emit_intrinsic, in a way
which allows struct/array/matrix type varyings to work.

This allows turnip to pass dEQP-VK.glsl.linkage.varying.struct.*

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6181>
This commit is contained in:
Jonathan Marek
2020-08-12 21:59:33 -04:00
committed by Marge Bot
parent c694af40bf
commit a6291b1b11
9 changed files with 110 additions and 169 deletions

View File

@@ -3,7 +3,6 @@ dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_neg_z_and_p
dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0 dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0
dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3
dEQP-VK.glsl.linkage.varying.struct.mat3x2
dEQP-VK.graphicsfuzz.mat-array-deep-control-flow dEQP-VK.graphicsfuzz.mat-array-deep-control-flow
dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.negate_denorm_preserve dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.negate_denorm_preserve
dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_out_prod dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_out_prod

View File

@@ -65,14 +65,16 @@ create_input(struct ir3_context *ctx, unsigned compmask)
} }
static struct ir3_instruction * static struct ir3_instruction *
create_frag_input(struct ir3_context *ctx, bool use_ldlv, unsigned n) create_frag_input(struct ir3_context *ctx, struct ir3_instruction *coord, unsigned n)
{ {
struct ir3_block *block = ctx->block; struct ir3_block *block = ctx->block;
struct ir3_instruction *instr; struct ir3_instruction *instr;
/* packed inloc is fixed up later: */ /* packed inloc is fixed up later: */
struct ir3_instruction *inloc = create_immed(block, n); struct ir3_instruction *inloc = create_immed(block, n);
if (use_ldlv) { if (coord) {
instr = ir3_BARY_F(block, inloc, 0, coord, 0);
} else if (ctx->compiler->flat_bypass) {
instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0); instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
instr->cat6.type = TYPE_U32; instr->cat6.type = TYPE_U32;
instr->cat6.iim_val = 1; instr->cat6.iim_val = 1;
@@ -1342,7 +1344,6 @@ static void add_sysval_input_compmask(struct ir3_context *ctx,
so->inputs[n].sysval = true; so->inputs[n].sysval = true;
so->inputs[n].slot = slot; so->inputs[n].slot = slot;
so->inputs[n].compmask = compmask; so->inputs[n].compmask = compmask;
so->inputs[n].interpolate = INTERP_MODE_FLAT;
so->total_in++; so->total_in++;
} }
@@ -1471,6 +1472,9 @@ get_frag_coord(struct ir3_context *ctx, nir_intrinsic_instr *intr)
return ctx->frag_coord; return ctx->frag_coord;
} }
static void setup_input(struct ir3_context *ctx, nir_intrinsic_instr *intr);
static void setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr);
static void static void
emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{ {
@@ -1479,7 +1483,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction * const *src; struct ir3_instruction * const *src;
struct ir3_block *b = ctx->block; struct ir3_block *b = ctx->block;
unsigned dest_components = nir_intrinsic_dest_components(intr); unsigned dest_components = nir_intrinsic_dest_components(intr);
int idx, comp; int idx;
if (info->has_dest) { if (info->has_dest) {
dst = ir3_get_dst(ctx, &intr->dest, dest_components); dst = ir3_get_dst(ctx, &intr->dest, dest_components);
@@ -1658,43 +1662,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
emit_intrinsic_barycentric(ctx, intr, dst); emit_intrinsic_barycentric(ctx, intr, dst);
break; break;
case nir_intrinsic_load_interpolated_input: case nir_intrinsic_load_interpolated_input:
idx = nir_intrinsic_base(intr);
comp = nir_intrinsic_component(intr);
src = ir3_get_src(ctx, &intr->src[0]);
if (nir_src_is_const(intr->src[1])) {
struct ir3_instruction *coord = ir3_create_collect(ctx, src, 2);
idx += nir_src_as_uint(intr->src[1]);
for (int i = 0; i < dest_components; i++) {
unsigned inloc = idx * 4 + i + comp;
if (ctx->so->inputs[idx].bary &&
!ctx->so->inputs[idx].use_ldlv) {
dst[i] = ir3_BARY_F(b, create_immed(b, inloc), 0, coord, 0);
} else {
/* for non-varyings use the pre-setup input, since
* that is easier than mapping things back to a
* nir_variable to figure out what it is.
*/
dst[i] = ctx->inputs[inloc];
compile_assert(ctx, dst[i]);
}
}
} else {
ir3_context_error(ctx, "unhandled");
}
break;
case nir_intrinsic_load_input: case nir_intrinsic_load_input:
idx = nir_intrinsic_base(intr); setup_input(ctx, intr);
comp = nir_intrinsic_component(intr);
if (nir_src_is_const(intr->src[0])) {
idx += nir_src_as_uint(intr->src[0]);
for (int i = 0; i < dest_components; i++) {
unsigned n = idx * 4 + i + comp;
dst[i] = ctx->inputs[n];
compile_assert(ctx, ctx->inputs[n]);
}
} else {
ir3_context_error(ctx, "unhandled");
}
break; break;
/* All SSBO intrinsics should have been lowered by 'lower_io_offsets' /* All SSBO intrinsics should have been lowered by 'lower_io_offsets'
* pass and replaced by an ir3-specifc version that adds the * pass and replaced by an ir3-specifc version that adds the
@@ -1803,16 +1772,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
b = NULL; b = NULL;
break; break;
case nir_intrinsic_store_output: case nir_intrinsic_store_output:
idx = nir_intrinsic_base(intr); setup_output(ctx, intr);
comp = nir_intrinsic_component(intr);
compile_assert(ctx, nir_src_is_const(intr->src[1]));
idx += nir_src_as_uint(intr->src[1]);
src = ir3_get_src(ctx, &intr->src[0]);
for (int i = 0; i < nir_intrinsic_src_components(intr, 0); i++) {
unsigned n = idx * 4 + i + comp;
ctx->outputs[n] = src[i];
}
break; break;
case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_first_vertex: case nir_intrinsic_load_first_vertex:
@@ -2949,92 +2909,53 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl)
} }
static void static void
setup_input(struct ir3_context *ctx, nir_variable *in) setup_input(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{ {
struct ir3_shader_variant *so = ctx->so; struct ir3_shader_variant *so = ctx->so;
unsigned ncomp = glsl_get_components(in->type); struct ir3_instruction *coord = NULL;
unsigned n = in->data.driver_location;
unsigned frac = in->data.location_frac; if (intr->intrinsic == nir_intrinsic_load_interpolated_input)
unsigned slot = in->data.location; coord = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), 2);
compile_assert(ctx, nir_src_is_const(intr->src[coord ? 1 : 0]));
unsigned frac = nir_intrinsic_component(intr);
unsigned offset = nir_src_as_uint(intr->src[coord ? 1 : 0]);
unsigned ncomp = nir_intrinsic_dest_components(intr);
unsigned n = nir_intrinsic_base(intr) + offset;
unsigned slot = nir_intrinsic_io_semantics(intr).location + offset;
unsigned compmask; unsigned compmask;
/* Inputs are loaded using ldlw or ldg for these stages. */ /* Inputs are loaded using ldlw or ldg for other stages. */
if (ctx->so->type == MESA_SHADER_TESS_CTRL || compile_assert(ctx, ctx->so->type == MESA_SHADER_FRAGMENT ||
ctx->so->type == MESA_SHADER_TESS_EVAL || ctx->so->type == MESA_SHADER_VERTEX);
ctx->so->type == MESA_SHADER_GEOMETRY)
return;
/* skip unread inputs, we could end up with (for example), unsplit
* matrix/etc inputs in the case they are not read, so just silently
* skip these.
*/
if (ncomp > 4)
return;
if (ctx->so->type == MESA_SHADER_FRAGMENT) if (ctx->so->type == MESA_SHADER_FRAGMENT)
compmask = BITFIELD_MASK(ncomp) << frac; compmask = BITFIELD_MASK(ncomp) << frac;
else else
compmask = BITFIELD_MASK(ncomp + frac); compmask = BITFIELD_MASK(ncomp + frac);
/* remove any already set set components */ /* for a4xx+ rasterflat */
compmask &= ~so->inputs[n].compmask; if (so->inputs[n].rasterflat && ctx->so->key.rasterflat)
if (!compmask) coord = NULL;
return;
so->total_in += util_bitcount(compmask & ~so->inputs[n].compmask);
so->inputs[n].slot = slot; so->inputs[n].slot = slot;
so->inputs[n].compmask |= compmask; so->inputs[n].compmask |= compmask;
so->inputs_count = MAX2(so->inputs_count, n + 1); so->inputs_count = MAX2(so->inputs_count, n + 1);
so->inputs[n].interpolate = in->data.interpolation; so->inputs[n].flat = !coord;
if (ctx->so->type == MESA_SHADER_FRAGMENT) { if (ctx->so->type == MESA_SHADER_FRAGMENT) {
compile_assert(ctx, slot != VARYING_SLOT_POS);
/* if any varyings have 'sample' qualifer, that triggers us so->inputs[n].bary = true;
* to run in per-sample mode:
*/
so->per_samp |= in->data.sample;
for (int i = 0; i < ncomp; i++) { for (int i = 0; i < ncomp; i++) {
struct ir3_instruction *instr = NULL;
unsigned idx = (n * 4) + i + frac; unsigned idx = (n * 4) + i + frac;
ctx->last_dst[i] = create_frag_input(ctx, coord, idx);
if (!(compmask & (1 << (i + frac))))
continue;
if (slot == VARYING_SLOT_POS) {
ir3_context_error(ctx, "fragcoord should be a sysval!\n");
} else {
/* detect the special case for front/back colors where
* we need to do flat vs smooth shading depending on
* rast state:
*/
if (in->data.interpolation == INTERP_MODE_NONE) {
switch (slot) {
case VARYING_SLOT_COL0:
case VARYING_SLOT_COL1:
case VARYING_SLOT_BFC0:
case VARYING_SLOT_BFC1:
so->inputs[n].rasterflat = true;
break;
default:
break;
}
}
if (ctx->compiler->flat_bypass) {
if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) ||
(so->inputs[n].rasterflat && ctx->so->key.rasterflat))
so->inputs[n].use_ldlv = true;
}
so->inputs[n].bary = true;
instr = create_frag_input(ctx, so->inputs[n].use_ldlv, idx);
}
compile_assert(ctx, idx < ctx->ninputs && !ctx->inputs[idx]);
ctx->inputs[idx] = instr;
} }
} else if (ctx->so->type == MESA_SHADER_VERTEX) { } else {
struct ir3_instruction *input = NULL; struct ir3_instruction *input = NULL;
foreach_input (in, ctx->ir) { foreach_input (in, ctx->ir) {
@@ -3067,10 +2988,11 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
ir3_split_dest(ctx->block, &ctx->inputs[idx], input, i, 1); ir3_split_dest(ctx->block, &ctx->inputs[idx], input, i, 1);
} }
}
if (so->inputs[n].bary || (ctx->so->type == MESA_SHADER_VERTEX)) { for (int i = 0; i < ncomp; i++) {
so->total_in += util_bitcount(compmask); unsigned idx = (n * 4) + i + frac;
ctx->last_dst[i] = ctx->inputs[idx];
}
} }
} }
@@ -3173,14 +3095,18 @@ pack_inlocs(struct ir3_context *ctx)
} }
static void static void
setup_output(struct ir3_context *ctx, nir_variable *out) setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{ {
struct ir3_shader_variant *so = ctx->so; struct ir3_shader_variant *so = ctx->so;
unsigned slots = glsl_count_vec4_slots(out->type, false, false); nir_io_semantics io = nir_intrinsic_io_semantics(intr);
unsigned ncomp = glsl_get_components(glsl_without_array(out->type));
unsigned n = out->data.driver_location; compile_assert(ctx, nir_src_is_const(intr->src[1]));
unsigned frac = out->data.location_frac;
unsigned slot = out->data.location; unsigned offset = nir_src_as_uint(intr->src[1]);
unsigned n = nir_intrinsic_base(intr) + offset;
unsigned frac = nir_intrinsic_component(intr);
unsigned ncomp = nir_intrinsic_src_components(intr, 0);
unsigned slot = io.location + offset;
if (ctx->so->type == MESA_SHADER_FRAGMENT) { if (ctx->so->type == MESA_SHADER_FRAGMENT) {
switch (slot) { switch (slot) {
@@ -3197,7 +3123,7 @@ setup_output(struct ir3_context *ctx, nir_variable *out)
so->writes_stencilref = true; so->writes_stencilref = true;
break; break;
default: default:
slot += out->data.index; /* For dual-src blend */ slot += io.dual_source_blend_index; /* For dual-src blend */
if (slot >= FRAG_RESULT_DATA0) if (slot >= FRAG_RESULT_DATA0)
break; break;
ir3_context_error(ctx, "unknown FS output name: %s\n", ir3_context_error(ctx, "unknown FS output name: %s\n",
@@ -3236,41 +3162,41 @@ setup_output(struct ir3_context *ctx, nir_variable *out)
_mesa_shader_stage_to_string(ctx->so->type), _mesa_shader_stage_to_string(ctx->so->type),
gl_varying_slot_name(slot)); gl_varying_slot_name(slot));
} }
} else if (ctx->so->type == MESA_SHADER_TESS_CTRL) {
/* output lowered to buffer writes. */
return;
} else { } else {
ir3_context_error(ctx, "unknown shader type: %d\n", ctx->so->type); ir3_context_error(ctx, "unknown shader type: %d\n", ctx->so->type);
} }
so->outputs_count = out->data.driver_location + slots; so->outputs_count = MAX2(so->outputs_count, n + 1);
compile_assert(ctx, so->outputs_count < ARRAY_SIZE(so->outputs)); compile_assert(ctx, so->outputs_count < ARRAY_SIZE(so->outputs));
for (int i = 0; i < slots; i++) { so->outputs[n].slot = slot;
int slot_base = n + i;
so->outputs[slot_base].slot = slot + i;
for (int i = 0; i < ncomp; i++) { for (int i = 0; i < ncomp; i++) {
unsigned idx = (slot_base * 4) + i + frac; unsigned idx = (n * 4) + i + frac;
compile_assert(ctx, idx < ctx->noutputs); compile_assert(ctx, idx < ctx->noutputs);
ctx->outputs[idx] = create_immed(ctx->block, fui(0.0));
}
/* if varying packing doesn't happen, we could end up in a situation
* with "holes" in the output, and since the per-generation code that
* sets up varying linkage registers doesn't expect to have more than
* one varying per vec4 slot, pad the holes.
*
* Note that this should probably generate a performance warning of
* some sort.
*/
for (int i = 0; i < frac; i++) {
unsigned idx = (n * 4) + i;
if (!ctx->outputs[idx]) {
ctx->outputs[idx] = create_immed(ctx->block, fui(0.0)); ctx->outputs[idx] = create_immed(ctx->block, fui(0.0));
} }
}
/* if varying packing doesn't happen, we could end up in a situation struct ir3_instruction * const *src = ir3_get_src(ctx, &intr->src[0]);
* with "holes" in the output, and since the per-generation code that for (int i = 0; i < ncomp; i++) {
* sets up varying linkage registers doesn't expect to have more than unsigned idx = (n * 4) + i + frac;
* one varying per vec4 slot, pad the holes. ctx->outputs[idx] = src[i];
*
* Note that this should probably generate a performance warning of
* some sort.
*/
for (int i = 0; i < frac; i++) {
unsigned idx = (slot_base * 4) + i;
if (!ctx->outputs[idx]) {
ctx->outputs[idx] = create_immed(ctx->block, fui(0.0));
}
}
} }
} }
@@ -3279,6 +3205,35 @@ emit_instructions(struct ir3_context *ctx)
{ {
nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s); nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
/* some varying setup which can't be done in setup_input(): */
if (ctx->so->type == MESA_SHADER_FRAGMENT) {
nir_foreach_shader_in_variable (var, ctx->s) {
/* if any varyings have 'sample' qualifer, that triggers us
* to run in per-sample mode:
*/
if (var->data.sample)
ctx->so->per_samp = true;
/* set rasterflat flag for front/back color */
if (var->data.interpolation == INTERP_MODE_NONE) {
switch (var->data.location) {
case VARYING_SLOT_COL0:
case VARYING_SLOT_COL1:
case VARYING_SLOT_BFC0:
case VARYING_SLOT_BFC1:
ctx->so->inputs[var->data.driver_location].rasterflat = true;
break;
default:
break;
}
}
}
}
/* TODO: for GS/HS/DS, load_input isn't used. but ctx->s->num_inputs is non-zero
* likely the same for num_outputs in cases where store_output isn't used
*/
ctx->so->inputs_count = ctx->s->num_inputs;
ctx->ninputs = ctx->s->num_inputs * 4; ctx->ninputs = ctx->s->num_inputs * 4;
ctx->noutputs = ctx->s->num_outputs * 4; ctx->noutputs = ctx->s->num_outputs * 4;
ctx->inputs = rzalloc_array(ctx, struct ir3_instruction *, ctx->ninputs); ctx->inputs = rzalloc_array(ctx, struct ir3_instruction *, ctx->ninputs);
@@ -3303,11 +3258,6 @@ emit_instructions(struct ir3_context *ctx)
ctx->ij[IJ_PERSP_PIXEL] = create_input(ctx, 0x3); ctx->ij[IJ_PERSP_PIXEL] = create_input(ctx, 0x3);
} }
/* Setup inputs: */
nir_foreach_shader_in_variable (var, ctx->s) {
setup_input(ctx, var);
}
/* Defer add_sysval_input() stuff until after setup_inputs(), /* Defer add_sysval_input() stuff until after setup_inputs(),
* because sysvals need to be appended after varyings: * because sysvals need to be appended after varyings:
*/ */
@@ -3351,11 +3301,6 @@ emit_instructions(struct ir3_context *ctx)
break; break;
} }
/* Setup outputs: */
nir_foreach_shader_out_variable (var, ctx->s) {
setup_output(ctx, var);
}
/* Find # of samplers. Just assume that we'll be reading from images.. if /* Find # of samplers. Just assume that we'll be reading from images.. if
* it is write-only we don't have to count it, but after lowering derefs * it is write-only we don't have to count it, but after lowering derefs
* is too late to compact indices for that. * is too late to compact indices for that.

View File

@@ -178,7 +178,6 @@ static void add_sysval(unsigned reg, unsigned compmask, gl_system_value sysval)
variant->inputs[n].sysval = true; variant->inputs[n].sysval = true;
variant->inputs[n].slot = sysval; variant->inputs[n].slot = sysval;
variant->inputs[n].compmask = compmask; variant->inputs[n].compmask = compmask;
variant->inputs[n].interpolate = INTERP_MODE_FLAT;
variant->total_in++; variant->total_in++;
} }

View File

@@ -588,9 +588,8 @@ struct ir3_shader_variant {
/* fragment shader specific: */ /* fragment shader specific: */
bool bary : 1; /* fetched varying (vs one loaded into reg) */ bool bary : 1; /* fetched varying (vs one loaded into reg) */
bool rasterflat : 1; /* special handling for emit->rasterflat */ bool rasterflat : 1; /* special handling for emit->rasterflat */
bool use_ldlv : 1; /* internal to ir3_compiler_nir */
bool half : 1; bool half : 1;
enum glsl_interp_mode interpolate; bool flat : 1;
} inputs[32 + 2]; /* +POSITION +FACE */ } inputs[32 + 2]; /* +POSITION +FACE */
/* sum of input components (scalar). For frag shaders, it only counts /* sum of input components (scalar). For frag shaders, it only counts

View File

@@ -1069,8 +1069,7 @@ tu6_vpc_varying_mode(const struct ir3_shader_variant *fs,
*interp_mode |= INTERP_ONE << 6; *interp_mode |= INTERP_ONE << 6;
shift += 2; shift += 2;
} }
} else if ((fs->inputs[index].interpolate == INTERP_MODE_FLAT) || } else if (fs->inputs[index].flat) {
fs->inputs[index].rasterflat) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (compmask & (1 << i)) { if (compmask & (1 << i)) {
*interp_mode |= INTERP_FLAT << shift; *interp_mode |= INTERP_FLAT << shift;

View File

@@ -361,7 +361,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
uint32_t inloc = fp->inputs[j].inloc; uint32_t inloc = fp->inputs[j].inloc;
if ((fp->inputs[j].interpolate == INTERP_MODE_FLAT) || if (fp->inputs[j].flat ||
(fp->inputs[j].rasterflat && emit->rasterflat)) { (fp->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc; uint32_t loc = inloc;

View File

@@ -465,7 +465,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
uint32_t inloc = s[FS].v->inputs[j].inloc; uint32_t inloc = s[FS].v->inputs[j].inloc;
if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || if (s[FS].v->inputs[j].flat ||
(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc; uint32_t loc = inloc;

View File

@@ -611,7 +611,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
uint32_t inloc = s[FS].v->inputs[j].inloc; uint32_t inloc = s[FS].v->inputs[j].inloc;
if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || if (s[FS].v->inputs[j].flat ||
(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc; uint32_t loc = inloc;

View File

@@ -940,7 +940,7 @@ emit_interp_state(struct fd_ringbuffer *ring, struct ir3_shader_variant *fs,
uint32_t inloc = fs->inputs[j].inloc; uint32_t inloc = fs->inputs[j].inloc;
if ((fs->inputs[j].interpolate == INTERP_MODE_FLAT) || if (fs->inputs[j].flat ||
(fs->inputs[j].rasterflat && rasterflat)) { (fs->inputs[j].rasterflat && rasterflat)) {
uint32_t loc = inloc; uint32_t loc = inloc;