v3d/compiler: implement load interpolated input intrinsics
We will lower GLSL interpolateAt functions to these. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Acked-by: Eric Anholt <eric@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7155>
This commit is contained in:

committed by
Alejandro Piñeiro

parent
3ec165bce9
commit
442f48f27b
@@ -2200,6 +2200,145 @@ ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This implementation is based on v3d_sample_{x,y}_offset() from
|
||||
* v3d_sample_offset.h.
|
||||
*/
|
||||
static void
|
||||
ntq_get_sample_offset(struct v3d_compile *c, struct qreg sample_idx,
|
||||
struct qreg *sx, struct qreg *sy)
|
||||
{
|
||||
sample_idx = vir_ITOF(c, sample_idx);
|
||||
|
||||
struct qreg offset_x =
|
||||
vir_FADD(c, vir_uniform_f(c, -0.125f),
|
||||
vir_FMUL(c, sample_idx,
|
||||
vir_uniform_f(c, 0.5f)));
|
||||
vir_set_pf(vir_FCMP_dest(c, vir_nop_reg(),
|
||||
vir_uniform_f(c, 2.0f), sample_idx),
|
||||
V3D_QPU_PF_PUSHC);
|
||||
offset_x = vir_SEL(c, V3D_QPU_COND_IFA,
|
||||
vir_FSUB(c, offset_x, vir_uniform_f(c, 1.25f)),
|
||||
offset_x);
|
||||
|
||||
struct qreg offset_y =
|
||||
vir_FADD(c, vir_uniform_f(c, -0.375f),
|
||||
vir_FMUL(c, sample_idx,
|
||||
vir_uniform_f(c, 0.25f)));
|
||||
*sx = offset_x;
|
||||
*sy = offset_y;
|
||||
}
|
||||
|
||||
/**
|
||||
* This implementation is based on get_centroid_offset() from fep.c.
|
||||
*/
|
||||
static void
|
||||
ntq_get_barycentric_centroid(struct v3d_compile *c,
|
||||
struct qreg *out_x,
|
||||
struct qreg *out_y)
|
||||
{
|
||||
struct qreg sample_mask;
|
||||
if (c->output_sample_mask_index != -1)
|
||||
sample_mask = c->outputs[c->output_sample_mask_index];
|
||||
else
|
||||
sample_mask = vir_MSF(c);
|
||||
|
||||
struct qreg i0 = vir_uniform_ui(c, 0);
|
||||
struct qreg i1 = vir_uniform_ui(c, 1);
|
||||
struct qreg i2 = vir_uniform_ui(c, 2);
|
||||
struct qreg i3 = vir_uniform_ui(c, 3);
|
||||
struct qreg i4 = vir_uniform_ui(c, 4);
|
||||
struct qreg i8 = vir_uniform_ui(c, 8);
|
||||
|
||||
/* sN = TRUE if sample N enabled in sample mask, FALSE otherwise */
|
||||
struct qreg F = vir_uniform_ui(c, 0);
|
||||
struct qreg T = vir_uniform_ui(c, ~0);
|
||||
struct qreg s0 = vir_XOR(c, vir_AND(c, sample_mask, i1), i1);
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s0), V3D_QPU_PF_PUSHZ);
|
||||
s0 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
|
||||
struct qreg s1 = vir_XOR(c, vir_AND(c, sample_mask, i2), i2);
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s1), V3D_QPU_PF_PUSHZ);
|
||||
s1 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
|
||||
struct qreg s2 = vir_XOR(c, vir_AND(c, sample_mask, i4), i4);
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s2), V3D_QPU_PF_PUSHZ);
|
||||
s2 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
|
||||
struct qreg s3 = vir_XOR(c, vir_AND(c, sample_mask, i8), i8);
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s3), V3D_QPU_PF_PUSHZ);
|
||||
s3 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
|
||||
|
||||
/* sample_idx = s0 ? 0 : s2 ? 2 : s1 ? 1 : 3 */
|
||||
struct qreg sample_idx = i3;
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s1), V3D_QPU_PF_PUSHZ);
|
||||
sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i1, sample_idx);
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s2), V3D_QPU_PF_PUSHZ);
|
||||
sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i2, sample_idx);
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s0), V3D_QPU_PF_PUSHZ);
|
||||
sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i0, sample_idx);
|
||||
|
||||
/* Get offset at selected sample index */
|
||||
struct qreg offset_x, offset_y;
|
||||
ntq_get_sample_offset(c, sample_idx, &offset_x, &offset_y);
|
||||
|
||||
/* Select pixel center [offset=(0,0)] if two opposing samples (or none)
|
||||
* are selected.
|
||||
*/
|
||||
struct qreg s0_and_s3 = vir_AND(c, s0, s3);
|
||||
struct qreg s1_and_s2 = vir_AND(c, s1, s2);
|
||||
|
||||
struct qreg use_center = vir_XOR(c, sample_mask, vir_uniform_ui(c, 0));
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), use_center), V3D_QPU_PF_PUSHZ);
|
||||
use_center = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
|
||||
use_center = vir_OR(c, use_center, s0_and_s3);
|
||||
use_center = vir_OR(c, use_center, s1_and_s2);
|
||||
|
||||
struct qreg zero = vir_uniform_f(c, 0.0f);
|
||||
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), use_center), V3D_QPU_PF_PUSHZ);
|
||||
offset_x = vir_SEL(c, V3D_QPU_COND_IFNA, zero, offset_x);
|
||||
offset_y = vir_SEL(c, V3D_QPU_COND_IFNA, zero, offset_y);
|
||||
|
||||
*out_x = offset_x;
|
||||
*out_y = offset_y;
|
||||
}
|
||||
|
||||
static struct qreg
|
||||
ntq_emit_load_interpolated_input(struct v3d_compile *c,
|
||||
struct qreg p,
|
||||
struct qreg C,
|
||||
struct qreg offset_x,
|
||||
struct qreg offset_y,
|
||||
unsigned mode)
|
||||
{
|
||||
if (mode == INTERP_MODE_FLAT)
|
||||
return C;
|
||||
|
||||
struct qreg sample_offset_x =
|
||||
vir_FSUB(c, vir_FXCD(c), vir_ITOF(c, vir_XCD(c)));
|
||||
struct qreg sample_offset_y =
|
||||
vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c)));
|
||||
|
||||
struct qreg scaleX =
|
||||
vir_FADD(c, vir_FSUB(c, vir_uniform_f(c, 0.5f), sample_offset_x),
|
||||
offset_x);
|
||||
struct qreg scaleY =
|
||||
vir_FADD(c, vir_FSUB(c, vir_uniform_f(c, 0.5f), sample_offset_y),
|
||||
offset_y);
|
||||
|
||||
struct qreg pInterp =
|
||||
vir_FADD(c, p, vir_FADD(c, vir_FMUL(c, vir_FDX(c, p), scaleX),
|
||||
vir_FMUL(c, vir_FDY(c, p), scaleY)));
|
||||
|
||||
if (mode == INTERP_MODE_NOPERSPECTIVE)
|
||||
return vir_FADD(c, pInterp, C);
|
||||
|
||||
struct qreg w = c->payload_w;
|
||||
struct qreg wInterp =
|
||||
vir_FADD(c, w, vir_FADD(c, vir_FMUL(c, vir_FDX(c, w), scaleX),
|
||||
vir_FMUL(c, vir_FDY(c, w), scaleY)));
|
||||
|
||||
return vir_FADD(c, vir_FMUL(c, pInterp, wInterp), C);
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
{
|
||||
@@ -2526,6 +2665,94 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c))));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_barycentric_at_offset:
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
vir_MOV(c, ntq_get_src(c, instr->src[0], 0)));
|
||||
ntq_store_dest(c, &instr->dest, 1,
|
||||
vir_MOV(c, ntq_get_src(c, instr->src[0], 1)));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
ntq_store_dest(c, &instr->dest, 0, vir_uniform_f(c, 0.0f));
|
||||
ntq_store_dest(c, &instr->dest, 1, vir_uniform_f(c, 0.0f));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_barycentric_at_sample: {
|
||||
if (!c->fs_key->msaa) {
|
||||
ntq_store_dest(c, &instr->dest, 0, vir_uniform_f(c, 0.0f));
|
||||
ntq_store_dest(c, &instr->dest, 1, vir_uniform_f(c, 0.0f));
|
||||
return;
|
||||
}
|
||||
|
||||
struct qreg offset_x, offset_y;
|
||||
struct qreg sample_idx = ntq_get_src(c, instr->src[0], 0);
|
||||
ntq_get_sample_offset(c, sample_idx, &offset_x, &offset_y);
|
||||
|
||||
ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, offset_x));
|
||||
ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, offset_y));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_barycentric_sample: {
|
||||
struct qreg offset_x =
|
||||
vir_FSUB(c, vir_FXCD(c), vir_ITOF(c, vir_XCD(c)));
|
||||
struct qreg offset_y =
|
||||
vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c)));
|
||||
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
vir_FSUB(c, offset_x, vir_uniform_f(c, 0.5f)));
|
||||
ntq_store_dest(c, &instr->dest, 1,
|
||||
vir_FSUB(c, offset_y, vir_uniform_f(c, 0.5f)));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_barycentric_centroid: {
|
||||
struct qreg offset_x, offset_y;
|
||||
ntq_get_barycentric_centroid(c, &offset_x, &offset_y);
|
||||
ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, offset_x));
|
||||
ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, offset_y));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_interpolated_input: {
|
||||
assert(nir_src_is_const(instr->src[1]));
|
||||
const uint32_t offset = nir_src_as_uint(instr->src[1]);
|
||||
|
||||
for (int i = 0; i < instr->num_components; i++) {
|
||||
const uint32_t input_idx =
|
||||
(nir_intrinsic_base(instr) + offset) * 4 +
|
||||
nir_intrinsic_component(instr) + i;
|
||||
|
||||
/* If we are not in MSAA or if we are not interpolating
|
||||
* a user varying, just return the pre-computed
|
||||
* interpolated input.
|
||||
*/
|
||||
if (!c->fs_key->msaa ||
|
||||
c->interp[input_idx].vp.file == QFILE_NULL) {
|
||||
ntq_store_dest(c, &instr->dest, i,
|
||||
vir_MOV(c, c->inputs[input_idx]));
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise compute interpolation at the specified
|
||||
* offset.
|
||||
*/
|
||||
struct qreg p = c->interp[input_idx].vp;
|
||||
struct qreg C = c->interp[input_idx].C;
|
||||
unsigned interp_mode = c->interp[input_idx].mode;
|
||||
|
||||
struct qreg offset_x = ntq_get_src(c, instr->src[0], 0);
|
||||
struct qreg offset_y = ntq_get_src(c, instr->src[0], 1);
|
||||
|
||||
struct qreg result =
|
||||
ntq_emit_load_interpolated_input(c, p, C,
|
||||
offset_x, offset_y,
|
||||
interp_mode);
|
||||
ntq_store_dest(c, &instr->dest, i, result);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Unknown intrinsic: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
|
Reference in New Issue
Block a user