From 68027bd38e134f45d1fe8612c0c31e5379ed7435 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 16 Aug 2023 23:08:35 +0300 Subject: [PATCH] intel/fs: implement dynamic interpolation mode for dynamic persample shaders There is no restriction for query per sample positions from the interpolator when in non-per-sample dispatch mode. But apparently that's not giving us the expected values for fragment shaders compiled without per-sample dispatch knowledge (graphics pipeline libraries). So when per-sample dispatch is dynamic and we're doing at_sample interpolation, turn the interpolation back into at_offset at runtime when we detect that the fragment shader is not run per sample. Fixes a bunch of dEQP-GLES31.functional.shaders.multisample_interpolation.interpolate_at_sample.* Signed-off-by: Lionel Landwerlin Fixes: d8dfd153c5 ("intel/fs: Make per-sample and coarse dispatch tri-state") Reviewed-by: Emma Anholt Part-of: --- .../compiler/brw_lower_logical_sends.cpp | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 2405fb4f6f4..b7610a1347a 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -2597,9 +2597,18 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, unreachable("Invalid interpolator instruction"); } + const bool dynamic_mode = + inst->opcode == FS_OPCODE_INTERPOLATE_AT_SAMPLE && + wm_prog_data->persample_dispatch == BRW_SOMETIMES; + fs_reg desc = inst->src[1]; uint32_t desc_imm = - brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective, + brw_pixel_interp_desc(devinfo, + /* Leave the mode at 0 if persample_dispatch is + * dynamic, it will be ORed in below. + */ + dynamic_mode ? 0 : mode, + inst->pi_noperspective, false /* coarse_pixel_rate */, inst->exec_size, inst->group); @@ -2613,7 +2622,7 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, ubld.AND(desc, dynamic_msaa_flags(wm_prog_data), brw_imm_ud(BRW_WM_MSAA_FLAG_COARSE_PI_MSG)); - /* And, if it's AT_OFFSET, we might have a non-trivial descriptor */ + /* And, if it's AT_OFFSET, we might have a non-trivial descriptor */ if (orig_desc.file == IMM) { desc_imm |= orig_desc.ud; } else { @@ -2621,6 +2630,48 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, } } + /* If persample_dispatch is dynamic, select the interpolation mode + * dynamically and OR into the descriptor to complete the static part + * generated by brw_pixel_interp_desc(). + * + * Why does this work? If you look at the SKL PRMs, Volume 7: + * 3D-Media-GPGPU, Shared Functions Pixel Interpolater, you'll see that + * + * - "Per Message Offset” Message Descriptor + * - “Sample Position Offset” Message Descriptor + * + * have different formats. Fortunately, a fragment shader dispatched at + * pixel rate, will have gl_SampleID = 0 & gl_NumSamples = 1. So the value + * we pack in “Sample Position Offset” will be a 0 and will cover the X/Y + * components of "Per Message Offset”, which will give us the pixel offset 0x0. + */ + if (dynamic_mode) { + fs_reg orig_desc = desc; + const fs_builder &ubld = bld.exec_all().group(8, 0); + desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); + + check_dynamic_msaa_flag(ubld, wm_prog_data, + BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH); + if (orig_desc.file == IMM) { + /* Not using SEL here because we would generate an instruction with 2 + * immediate sources which is not supported by HW. + */ + set_predicate_inv(BRW_PREDICATE_NORMAL, false, + ubld.MOV(desc, brw_imm_ud(orig_desc.ud | + GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE << 12))); + set_predicate_inv(BRW_PREDICATE_NORMAL, true, + ubld.MOV(desc, brw_imm_ud(orig_desc.ud | + GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET << 12))); + } else { + set_predicate_inv(BRW_PREDICATE_NORMAL, false, + ubld.OR(desc, orig_desc, + brw_imm_ud(GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE << 12))); + set_predicate_inv(BRW_PREDICATE_NORMAL, true, + ubld.OR(desc, orig_desc, + brw_imm_ud(GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET << 12))); + } + } + assert(bld.shader->devinfo->ver >= 7); inst->opcode = SHADER_OPCODE_SEND; inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR;