intel/fs/xe2+: Add ALU-based implementation of barycentric interpolation at a per-channel sample.

This implements a replacement for the previous implementation of
nir_intrinsic_load_barycentric_at_sample that relied on the Pixel
Interpolator shared function, since it's going to be removed from the
hardware from Xe2 onwards.

This implementation simply looks up the X/Y offsets of each sample
index on the table provided in the PS thread payload by using indirect
addressing, then does the actual interpolation by recursing into
emit_pixel_interpolater_alu_at_offset() introduced in the previous
commit.

Note that even though this is only immediately useful on Xe2+
platforms there's no reason why it shouldn't work on earlier
platforms, as long as we have the sample X/Y offsets available in the
thread payload.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29847>
This commit is contained in:
Francisco Jerez
2024-06-20 18:51:06 -07:00
committed by Marge Bot
parent 95eec5a0dd
commit 79fa3eba11

View File

@@ -2161,6 +2161,63 @@ emit_pixel_interpolater_alu_at_offset(const fs_builder &bld,
}
}
/**
* Interpolate per-polygon barycentrics at a specified sample index,
* optionally using perspective-correct interpolation if requested.
* This is mostly useful as replacement for the PI shared function
* that existed on platforms prior to Xe2, but is expected to work on
* earlier platforms since we can get the required polygon setup
* information from the thread payload as far back as ICL.
*/
static void
emit_pixel_interpolater_alu_at_sample(const fs_builder &bld,
const fs_reg &dst,
const fs_reg &idx,
glsl_interp_mode interpolation)
{
const fs_thread_payload &payload = bld.shader->fs_payload();
const struct brw_wm_prog_data *wm_prog_data =
brw_wm_prog_data(bld.shader->prog_data);
const fs_builder ubld = bld.exec_all().group(16, 0);
const fs_reg sample_offs_xy = ubld.vgrf(BRW_TYPE_UD);
assert(wm_prog_data->uses_sample_offsets);
/* Interleave the X/Y coordinates of each sample in order to allow
* a single indirect look-up, by using a MOV for the 16 X
* coordinates, then another MOV for the 16 Y coordinates.
*/
for (unsigned i = 0; i < 2; i++) {
const fs_reg reg = retype(brw_vec16_grf(payload.sample_offsets_reg, 4 * i),
BRW_TYPE_UB);
ubld.MOV(subscript(sample_offs_xy, BRW_TYPE_UW, i), reg);
}
/* Use indirect addressing to fetch the X/Y offsets of the sample
* index provided for each channel.
*/
const fs_reg idx_b = bld.vgrf(BRW_TYPE_UD);
bld.MUL(idx_b, idx, brw_imm_ud(brw_type_size_bytes(BRW_TYPE_UD)));
const fs_reg off_xy = bld.vgrf(BRW_TYPE_UD);
bld.emit(SHADER_OPCODE_MOV_INDIRECT, off_xy, component(sample_offs_xy, 0),
idx_b, brw_imm_ud(16 * brw_type_size_bytes(BRW_TYPE_UD)));
/* Convert the selected fixed-point offsets to floating-point
* offsets.
*/
const fs_reg offs = bld.vgrf(BRW_TYPE_F, 2);
for (unsigned i = 0; i < 2; i++) {
const fs_reg tmp = bld.vgrf(BRW_TYPE_F);
bld.MOV(tmp, subscript(off_xy, BRW_TYPE_UW, i));
bld.MUL(tmp, tmp, brw_imm_f(0.0625));
bld.ADD(offset(offs, bld, i), tmp, brw_imm_f(-0.5));
}
/* Interpolate at the resulting offsets. */
emit_pixel_interpolater_alu_at_offset(bld, dst, offs, interpolation);
}
/**
* Computes 1 << x, given a D/UD register containing some value x.
*/
@@ -4233,35 +4290,43 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
const glsl_interp_mode interpolation =
(enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
fs_reg msg_data;
if (nir_src_is_const(instr->src[0])) {
msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4);
if (devinfo->ver >= 20) {
emit_pixel_interpolater_alu_at_sample(
bld, dest, retype(get_nir_src(ntb, instr->src[0]),
BRW_TYPE_UD),
interpolation);
} else {
const fs_reg sample_src = retype(get_nir_src(ntb, instr->src[0]),
BRW_TYPE_UD);
const fs_reg sample_id = bld.emit_uniformize(sample_src);
msg_data = component(bld.group(8, 0).vgrf(BRW_TYPE_UD), 0);
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
fs_reg msg_data;
if (nir_src_is_const(instr->src[0])) {
msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4);
} else {
const fs_reg sample_src = retype(get_nir_src(ntb, instr->src[0]),
BRW_TYPE_UD);
const fs_reg sample_id = bld.emit_uniformize(sample_src);
msg_data = component(bld.group(8, 0).vgrf(BRW_TYPE_UD), 0);
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
}
fs_reg flag_reg;
struct brw_wm_prog_key *wm_prog_key = (struct brw_wm_prog_key *) s.key;
if (wm_prog_key->multisample_fbo == BRW_SOMETIMES) {
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s.prog_data);
check_dynamic_msaa_flag(bld.exec_all().group(8, 0),
wm_prog_data,
INTEL_MSAA_FLAG_MULTISAMPLE_FBO);
flag_reg = brw_flag_reg(0, 0);
}
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dest,
fs_reg(), /* src */
msg_data,
flag_reg,
interpolation);
}
fs_reg flag_reg;
struct brw_wm_prog_key *wm_prog_key = (struct brw_wm_prog_key *) s.key;
if (wm_prog_key->multisample_fbo == BRW_SOMETIMES) {
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s.prog_data);
check_dynamic_msaa_flag(bld.exec_all().group(8, 0),
wm_prog_data,
INTEL_MSAA_FLAG_MULTISAMPLE_FBO);
flag_reg = brw_flag_reg(0, 0);
}
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dest,
fs_reg(), /* src */
msg_data,
flag_reg,
interpolation);
break;
}