intel/compiler: Convert wm_prog_key::persample_interp to a tri-state

This allows for the possibility that we may not know at compile time if
sample shading is enabled through the API.  While we're here, also
document exactly what this bit means so we don't confuse ourselves.

v2: Fixup coarse pixel values (Lionel)

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21094>
This commit is contained in:
Jason Ekstrand
2021-11-19 16:34:19 -06:00
committed by Marge Bot
parent d8dfd153c5
commit 5644011f06
11 changed files with 173 additions and 49 deletions

View File

@@ -4843,8 +4843,9 @@ crocus_populate_fs_key(const struct crocus_context *ice,
key->flat_shade = rast->cso.flatshade &&
(info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1));
key->persample_interp = rast->cso.force_persample_interp;
key->multisample_fbo = rast->cso.multisample && fb->samples > 1;
key->persample_interp =
rast->cso.force_persample_interp ? BRW_ALWAYS : BRW_NEVER;
key->ignore_sample_mask_out = !key->multisample_fbo;
key->coherent_fb_fetch = false; // TODO: needed?

View File

@@ -159,7 +159,7 @@ iris_to_brw_fs_key(const struct iris_screen *screen,
.alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
.alpha_to_coverage = key->alpha_to_coverage,
.clamp_fragment_color = key->clamp_fragment_color,
.persample_interp = key->persample_interp,
.persample_interp = key->persample_interp ? BRW_ALWAYS : BRW_NEVER,
.multisample_fbo = key->multisample_fbo,
.force_dual_color_blend = key->force_dual_color_blend,
.coherent_fb_fetch = key->coherent_fb_fetch,

View File

@@ -6523,7 +6523,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
wm.StatisticsEnable = ice->state.statistics_counters_enabled;
wm.BarycentricInterpolationMode =
wm_prog_data->barycentric_interp_modes;
wm_prog_data_barycentric_modes(wm_prog_data, 0);
if (wm_prog_data->early_fragment_tests)
wm.EarlyDepthStencilControl = EDSC_PREPS;

View File

@@ -498,7 +498,7 @@ struct brw_wm_prog_key {
* us to run per-sample. Even when running per-sample due to gl_SampleID,
* we may still interpolate unqualified inputs at the pixel center.
*/
bool persample_interp:1;
enum brw_sometimes persample_interp:2;
bool multisample_fbo:1;
enum brw_sometimes line_aa:2;
@@ -507,7 +507,7 @@ struct brw_wm_prog_key {
bool ignore_sample_mask_out:1;
bool coarse_pixel:1;
uint64_t padding:58;
uint64_t padding:57;
};
struct brw_cs_prog_key {
@@ -828,6 +828,10 @@ enum brw_barycentric_mode {
BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE = 5,
BRW_BARYCENTRIC_MODE_COUNT = 6
};
#define BRW_BARYCENTRIC_PERSPECTIVE_BITS \
((1 << BRW_BARYCENTRIC_PERSPECTIVE_PIXEL) | \
(1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID) | \
(1 << BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))
#define BRW_BARYCENTRIC_NONPERSPECTIVE_BITS \
((1 << BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
(1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
@@ -854,6 +858,9 @@ enum brw_wm_msaa_flags {
/** True if this shader has been dispatched per-sample */
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
/** True if inputs should be interpolated per-sample by default */
BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
/** True if this shader has been dispatched coarse
*
* This is intentionally chose to be bit 18 to correspond to the coarse
@@ -1090,6 +1097,56 @@ brw_wm_prog_data_is_persample(const struct brw_wm_prog_data *prog_data,
return prog_data->persample_dispatch;
}
static inline uint32_t
wm_prog_data_barycentric_modes(const struct brw_wm_prog_data *prog_data,
enum brw_wm_msaa_flags pushed_msaa_flags)
{
uint32_t modes = prog_data->barycentric_interp_modes;
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP) {
assert(pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC);
assert(prog_data->persample_dispatch == BRW_ALWAYS ||
(pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH));
/* Making dynamic per-sample interpolation work is a bit tricky. The
* hardware will hang if SAMPLE is requested but per-sample dispatch is
* not enabled. This means we can't preemptively add SAMPLE to the
* barycentrics bitfield. Instead, we have to add it late and only
* on-demand. Annoyingly, changing the number of barycentrics requested
* changes the whole PS shader payload so we very much don't want to do
* that. Instead, if the dynamic per-sample interpolation flag is set,
* we check to see if SAMPLE was requested and, if not, replace the
* highest barycentric bit in the [non]perspective grouping (CENTROID,
* if it exists, else PIXEL) with SAMPLE. The shader will stomp all the
* barycentrics in the shader with SAMPLE so it really doesn't matter
* which one we replace. The important thing is that we keep the number
* of barycentrics in each [non]perspective grouping the same.
*/
if ((modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) &&
!(modes & BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))) {
int sample_mode =
util_last_bit(modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
assert(modes & BITFIELD_BIT(sample_mode));
modes &= ~BITFIELD_BIT(sample_mode);
modes |= BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE);
}
if ((modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) &&
!(modes & BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))) {
int sample_mode =
util_last_bit(modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
assert(modes & BITFIELD_BIT(sample_mode));
modes &= ~BITFIELD_BIT(sample_mode);
modes |= BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE);
}
}
return modes;
}
static inline bool
brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
enum brw_wm_msaa_flags pushed_msaa_flags)

View File

@@ -7284,9 +7284,10 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
shader->info.fs.uses_sample_shading ||
shader->info.outputs_read;
prog_data->persample_dispatch = BRW_NEVER;
if (key->multisample_fbo &&
(key->persample_interp || prog_data->sample_shading))
assert(key->multisample_fbo || key->persample_interp == BRW_NEVER);
prog_data->persample_dispatch = key->persample_interp;
if (key->multisample_fbo && prog_data->sample_shading)
prog_data->persample_dispatch = BRW_ALWAYS;
if (devinfo->ver >= 6) {

View File

@@ -289,6 +289,7 @@ fs_visitor::emit_interpolation_setup_gfx6()
this->pixel_x = vgrf(glsl_type::float_type);
this->pixel_y = vgrf(glsl_type::float_type);
const struct brw_wm_prog_key *wm_key = (brw_wm_prog_key*) this->key;
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
@@ -394,48 +395,58 @@ fs_visitor::emit_interpolation_setup_gfx6()
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
switch (wm_prog_data->coarse_pixel_dispatch) {
case BRW_NEVER:
#define COPY_OFFSET_REG(prefix, suffix) \
prefix##_pixel_##suffix = prefix##_sample_##suffix;
COPY_OFFSET_REG(int, offset_x)
COPY_OFFSET_REG(int, offset_y)
COPY_OFFSET_REG(int, offset_xy)
COPY_OFFSET_REG(half_int, offset_x)
COPY_OFFSET_REG(half_int, offset_y)
#undef COPY_OFFSET_REG
int_pixel_offset_x = int_sample_offset_x;
int_pixel_offset_y = int_sample_offset_y;
int_pixel_offset_xy = int_sample_offset_xy;
half_int_pixel_offset_x = half_int_sample_offset_x;
half_int_pixel_offset_y = half_int_sample_offset_y;
break;
case BRW_SOMETIMES:
check_dynamic_msaa_flag(bld, wm_prog_data,
case BRW_SOMETIMES: {
const fs_builder dbld =
abld.exec_all().group(MIN2(16, dispatch_width) * 2, 0);
check_dynamic_msaa_flag(dbld, wm_prog_data,
BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
#define COPY_OFFSET_REG(prefix, suffix) \
prefix##_pixel_##suffix = bld.vgrf(BRW_REGISTER_TYPE_UW); \
bld.SEL(prefix##_pixel_##suffix, \
prefix##_coarse_##suffix, \
prefix##_pixel_##suffix); \
int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
set_predicate(BRW_PREDICATE_NORMAL,
dbld.SEL(int_pixel_offset_x,
int_coarse_offset_x,
int_sample_offset_x));
COPY_OFFSET_REG(int, offset_x)
COPY_OFFSET_REG(int, offset_y)
COPY_OFFSET_REG(int, offset_xy)
COPY_OFFSET_REG(half_int, offset_x)
COPY_OFFSET_REG(half_int, offset_y)
int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
set_predicate(BRW_PREDICATE_NORMAL,
dbld.SEL(int_pixel_offset_y,
int_coarse_offset_y,
int_sample_offset_y));
#undef COPY_OFFSET_REG
int_pixel_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
set_predicate(BRW_PREDICATE_NORMAL,
dbld.SEL(int_pixel_offset_xy,
int_coarse_offset_xy,
int_sample_offset_xy));
half_int_pixel_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
set_predicate(BRW_PREDICATE_NORMAL,
bld.SEL(half_int_pixel_offset_x,
half_int_coarse_offset_x,
half_int_sample_offset_x));
half_int_pixel_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
set_predicate(BRW_PREDICATE_NORMAL,
bld.SEL(half_int_pixel_offset_y,
half_int_coarse_offset_y,
half_int_sample_offset_y));
break;
}
case BRW_ALWAYS:
#define COPY_OFFSET_REG(prefix, suffix) \
prefix##_pixel_##suffix = prefix##_coarse_##suffix;
COPY_OFFSET_REG(int, offset_x)
COPY_OFFSET_REG(int, offset_y)
COPY_OFFSET_REG(int, offset_xy)
COPY_OFFSET_REG(half_int, offset_x)
COPY_OFFSET_REG(half_int, offset_y)
#undef COPY_OFFSET_REG
int_pixel_offset_x = int_coarse_offset_x;
int_pixel_offset_y = int_coarse_offset_y;
int_pixel_offset_xy = int_coarse_offset_xy;
half_int_pixel_offset_x = half_int_coarse_offset_x;
half_int_pixel_offset_y = half_int_coarse_offset_y;
break;
}
@@ -605,6 +616,55 @@ fs_visitor::emit_interpolation_setup_gfx6()
abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
}
if (wm_key->persample_interp == BRW_SOMETIMES) {
assert(!devinfo->needs_unlit_centroid_workaround);
const fs_builder ubld = bld.exec_all().group(16, 0);
bool loaded_flag = false;
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
if (!(wm_prog_data->barycentric_interp_modes & BITFIELD_BIT(i)))
continue;
/* The sample mode will always be the top bit set in the perspective
* or non-perspective section. In the case where no SAMPLE mode was
* requested, wm_prog_data_barycentric_modes() will swap out the top
* mode for SAMPLE so this works regardless of whether SAMPLE was
* requested or not.
*/
int sample_mode;
if (BITFIELD_BIT(i) & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) {
sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
} else {
sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
}
assert(wm_prog_data->barycentric_interp_modes &
BITFIELD_BIT(sample_mode));
if (i == sample_mode)
continue;
uint8_t *barys = fs_payload().barycentric_coord_reg[i];
uint8_t *sample_barys = fs_payload().barycentric_coord_reg[sample_mode];
assert(barys[0] && sample_barys[0]);
if (!loaded_flag) {
check_dynamic_msaa_flag(ubld, wm_prog_data,
BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP);
}
for (unsigned j = 0; j < dispatch_width / 8; j++) {
fs_inst *mov =
ubld.MOV(brw_vec8_grf(barys[j / 2] + (j % 2) * 2, 0),
brw_vec8_grf(sample_barys[j / 2] + (j % 2) * 2, 0));
mov->predicate = BRW_PREDICATE_NORMAL;
}
}
}
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
this->delta_xy[i] = fetch_barycentric_reg(
bld, fs_payload().barycentric_coord_reg[i]);

View File

@@ -555,7 +555,7 @@ brw_nir_lower_fs_inputs(nir_shader *nir,
if (!key->multisample_fbo) {
nir_lower_single_sampled(nir);
} else if (key->persample_interp) {
} else if (key->persample_interp == BRW_ALWAYS) {
nir_shader_instructions_pass(nir, lower_barycentric_per_sample,
nir_metadata_block_index |
nir_metadata_dominance,

View File

@@ -556,8 +556,10 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
* harmless to compute it and then let dead-code take care of it.
*/
if (ms->rasterization_samples > 1) {
key->persample_interp = ms->sample_shading_enable &&
(ms->min_sample_shading * ms->rasterization_samples) > 1;
key->persample_interp =
(ms->sample_shading_enable &&
(ms->min_sample_shading * ms->rasterization_samples) > 1) ?
BRW_ALWAYS : BRW_NEVER;
key->multisample_fbo = true;
}

View File

@@ -1500,7 +1500,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
wm_prog_data->uses_kill;
wm.BarycentricInterpolationMode =
wm_prog_data->barycentric_interp_modes;
wm_prog_data_barycentric_modes(wm_prog_data, 0);
}
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
@@ -1615,7 +1615,8 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
#if GFX_VER >= 11
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
wm_prog_data->uses_depth_w_coefficients;
ps.PixelShaderIsPerCoarsePixel = wm_prog_data->coarse_pixel_dispatch;
ps.PixelShaderIsPerCoarsePixel =
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
#endif
#if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses

View File

@@ -373,8 +373,10 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
* harmless to compute it and then let dead-code take care of it.
*/
if (ms->rasterization_samples > 1) {
key->persample_interp = ms->sample_shading_enable &&
(ms->min_sample_shading * ms->rasterization_samples) > 1;
key->persample_interp =
(ms->sample_shading_enable &&
(ms->min_sample_shading * ms->rasterization_samples) > 1) ?
BRW_ALWAYS : BRW_NEVER;
key->multisample_fbo = true;
}

View File

@@ -1580,7 +1580,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
#endif
wm.BarycentricInterpolationMode =
wm_prog_data->barycentric_interp_modes;
wm_prog_data_barycentric_modes(wm_prog_data, 0);
#if GFX_VER < 8
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;