intel/compiler: Convert wm_prog_key::persample_interp to a tri-state
This allows for the possibility that we may not know at compile time if sample shading is enabled through the API. While we're here, also document exactly what this bit means so we don't confuse ourselves. v2: Fixup coarse pixel values (Lionel) Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21094>
This commit is contained in:

committed by
Marge Bot

parent
d8dfd153c5
commit
5644011f06
@@ -498,7 +498,7 @@ struct brw_wm_prog_key {
|
||||
* us to run per-sample. Even when running per-sample due to gl_SampleID,
|
||||
* we may still interpolate unqualified inputs at the pixel center.
|
||||
*/
|
||||
bool persample_interp:1;
|
||||
enum brw_sometimes persample_interp:2;
|
||||
|
||||
bool multisample_fbo:1;
|
||||
enum brw_sometimes line_aa:2;
|
||||
@@ -507,7 +507,7 @@ struct brw_wm_prog_key {
|
||||
bool ignore_sample_mask_out:1;
|
||||
bool coarse_pixel:1;
|
||||
|
||||
uint64_t padding:58;
|
||||
uint64_t padding:57;
|
||||
};
|
||||
|
||||
struct brw_cs_prog_key {
|
||||
@@ -828,6 +828,10 @@ enum brw_barycentric_mode {
|
||||
BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE = 5,
|
||||
BRW_BARYCENTRIC_MODE_COUNT = 6
|
||||
};
|
||||
#define BRW_BARYCENTRIC_PERSPECTIVE_BITS \
|
||||
((1 << BRW_BARYCENTRIC_PERSPECTIVE_PIXEL) | \
|
||||
(1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID) | \
|
||||
(1 << BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))
|
||||
#define BRW_BARYCENTRIC_NONPERSPECTIVE_BITS \
|
||||
((1 << BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
|
||||
(1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
|
||||
@@ -854,6 +858,9 @@ enum brw_wm_msaa_flags {
|
||||
/** True if this shader has been dispatched per-sample */
|
||||
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
|
||||
|
||||
/** True if inputs should be interpolated per-sample by default */
|
||||
BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
|
||||
|
||||
/** True if this shader has been dispatched coarse
|
||||
*
|
||||
* This is intentionally chose to be bit 18 to correspond to the coarse
|
||||
@@ -1090,6 +1097,56 @@ brw_wm_prog_data_is_persample(const struct brw_wm_prog_data *prog_data,
|
||||
return prog_data->persample_dispatch;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
wm_prog_data_barycentric_modes(const struct brw_wm_prog_data *prog_data,
|
||||
enum brw_wm_msaa_flags pushed_msaa_flags)
|
||||
{
|
||||
uint32_t modes = prog_data->barycentric_interp_modes;
|
||||
|
||||
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP) {
|
||||
assert(pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC);
|
||||
|
||||
assert(prog_data->persample_dispatch == BRW_ALWAYS ||
|
||||
(pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH));
|
||||
|
||||
/* Making dynamic per-sample interpolation work is a bit tricky. The
|
||||
* hardware will hang if SAMPLE is requested but per-sample dispatch is
|
||||
* not enabled. This means we can't preemptively add SAMPLE to the
|
||||
* barycentrics bitfield. Instead, we have to add it late and only
|
||||
* on-demand. Annoyingly, changing the number of barycentrics requested
|
||||
* changes the whole PS shader payload so we very much don't want to do
|
||||
* that. Instead, if the dynamic per-sample interpolation flag is set,
|
||||
* we check to see if SAMPLE was requested and, if not, replace the
|
||||
* highest barycentric bit in the [non]perspective grouping (CENTROID,
|
||||
* if it exists, else PIXEL) with SAMPLE. The shader will stomp all the
|
||||
* barycentrics in the shader with SAMPLE so it really doesn't matter
|
||||
* which one we replace. The important thing is that we keep the number
|
||||
* of barycentrics in each [non]perspective grouping the same.
|
||||
*/
|
||||
if ((modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) &&
|
||||
!(modes & BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))) {
|
||||
int sample_mode =
|
||||
util_last_bit(modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
|
||||
assert(modes & BITFIELD_BIT(sample_mode));
|
||||
|
||||
modes &= ~BITFIELD_BIT(sample_mode);
|
||||
modes |= BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE);
|
||||
}
|
||||
|
||||
if ((modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) &&
|
||||
!(modes & BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))) {
|
||||
int sample_mode =
|
||||
util_last_bit(modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
|
||||
assert(modes & BITFIELD_BIT(sample_mode));
|
||||
|
||||
modes &= ~BITFIELD_BIT(sample_mode);
|
||||
modes |= BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE);
|
||||
}
|
||||
}
|
||||
|
||||
return modes;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
|
||||
enum brw_wm_msaa_flags pushed_msaa_flags)
|
||||
|
@@ -7284,9 +7284,10 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
|
||||
shader->info.fs.uses_sample_shading ||
|
||||
shader->info.outputs_read;
|
||||
|
||||
prog_data->persample_dispatch = BRW_NEVER;
|
||||
if (key->multisample_fbo &&
|
||||
(key->persample_interp || prog_data->sample_shading))
|
||||
assert(key->multisample_fbo || key->persample_interp == BRW_NEVER);
|
||||
|
||||
prog_data->persample_dispatch = key->persample_interp;
|
||||
if (key->multisample_fbo && prog_data->sample_shading)
|
||||
prog_data->persample_dispatch = BRW_ALWAYS;
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
|
@@ -289,6 +289,7 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
||||
this->pixel_x = vgrf(glsl_type::float_type);
|
||||
this->pixel_y = vgrf(glsl_type::float_type);
|
||||
|
||||
const struct brw_wm_prog_key *wm_key = (brw_wm_prog_key*) this->key;
|
||||
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
|
||||
|
||||
fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
|
||||
@@ -394,48 +395,58 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
||||
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
|
||||
switch (wm_prog_data->coarse_pixel_dispatch) {
|
||||
case BRW_NEVER:
|
||||
#define COPY_OFFSET_REG(prefix, suffix) \
|
||||
prefix##_pixel_##suffix = prefix##_sample_##suffix;
|
||||
|
||||
COPY_OFFSET_REG(int, offset_x)
|
||||
COPY_OFFSET_REG(int, offset_y)
|
||||
COPY_OFFSET_REG(int, offset_xy)
|
||||
COPY_OFFSET_REG(half_int, offset_x)
|
||||
COPY_OFFSET_REG(half_int, offset_y)
|
||||
|
||||
#undef COPY_OFFSET_REG
|
||||
int_pixel_offset_x = int_sample_offset_x;
|
||||
int_pixel_offset_y = int_sample_offset_y;
|
||||
int_pixel_offset_xy = int_sample_offset_xy;
|
||||
half_int_pixel_offset_x = half_int_sample_offset_x;
|
||||
half_int_pixel_offset_y = half_int_sample_offset_y;
|
||||
break;
|
||||
|
||||
case BRW_SOMETIMES:
|
||||
check_dynamic_msaa_flag(bld, wm_prog_data,
|
||||
case BRW_SOMETIMES: {
|
||||
const fs_builder dbld =
|
||||
abld.exec_all().group(MIN2(16, dispatch_width) * 2, 0);
|
||||
|
||||
check_dynamic_msaa_flag(dbld, wm_prog_data,
|
||||
BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
|
||||
|
||||
#define COPY_OFFSET_REG(prefix, suffix) \
|
||||
prefix##_pixel_##suffix = bld.vgrf(BRW_REGISTER_TYPE_UW); \
|
||||
bld.SEL(prefix##_pixel_##suffix, \
|
||||
prefix##_coarse_##suffix, \
|
||||
prefix##_pixel_##suffix); \
|
||||
int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
set_predicate(BRW_PREDICATE_NORMAL,
|
||||
dbld.SEL(int_pixel_offset_x,
|
||||
int_coarse_offset_x,
|
||||
int_sample_offset_x));
|
||||
|
||||
COPY_OFFSET_REG(int, offset_x)
|
||||
COPY_OFFSET_REG(int, offset_y)
|
||||
COPY_OFFSET_REG(int, offset_xy)
|
||||
COPY_OFFSET_REG(half_int, offset_x)
|
||||
COPY_OFFSET_REG(half_int, offset_y)
|
||||
int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
set_predicate(BRW_PREDICATE_NORMAL,
|
||||
dbld.SEL(int_pixel_offset_y,
|
||||
int_coarse_offset_y,
|
||||
int_sample_offset_y));
|
||||
|
||||
#undef COPY_OFFSET_REG
|
||||
int_pixel_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
set_predicate(BRW_PREDICATE_NORMAL,
|
||||
dbld.SEL(int_pixel_offset_xy,
|
||||
int_coarse_offset_xy,
|
||||
int_sample_offset_xy));
|
||||
|
||||
half_int_pixel_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
set_predicate(BRW_PREDICATE_NORMAL,
|
||||
bld.SEL(half_int_pixel_offset_x,
|
||||
half_int_coarse_offset_x,
|
||||
half_int_sample_offset_x));
|
||||
|
||||
half_int_pixel_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
set_predicate(BRW_PREDICATE_NORMAL,
|
||||
bld.SEL(half_int_pixel_offset_y,
|
||||
half_int_coarse_offset_y,
|
||||
half_int_sample_offset_y));
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_ALWAYS:
|
||||
#define COPY_OFFSET_REG(prefix, suffix) \
|
||||
prefix##_pixel_##suffix = prefix##_coarse_##suffix;
|
||||
|
||||
COPY_OFFSET_REG(int, offset_x)
|
||||
COPY_OFFSET_REG(int, offset_y)
|
||||
COPY_OFFSET_REG(int, offset_xy)
|
||||
COPY_OFFSET_REG(half_int, offset_x)
|
||||
COPY_OFFSET_REG(half_int, offset_y)
|
||||
|
||||
#undef COPY_OFFSET_REG
|
||||
int_pixel_offset_x = int_coarse_offset_x;
|
||||
int_pixel_offset_y = int_coarse_offset_y;
|
||||
int_pixel_offset_xy = int_coarse_offset_xy;
|
||||
half_int_pixel_offset_x = half_int_coarse_offset_x;
|
||||
half_int_pixel_offset_y = half_int_coarse_offset_y;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -605,6 +616,55 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
||||
abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
|
||||
}
|
||||
|
||||
if (wm_key->persample_interp == BRW_SOMETIMES) {
|
||||
assert(!devinfo->needs_unlit_centroid_workaround);
|
||||
|
||||
const fs_builder ubld = bld.exec_all().group(16, 0);
|
||||
bool loaded_flag = false;
|
||||
|
||||
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
|
||||
if (!(wm_prog_data->barycentric_interp_modes & BITFIELD_BIT(i)))
|
||||
continue;
|
||||
|
||||
/* The sample mode will always be the top bit set in the perspective
|
||||
* or non-perspective section. In the case where no SAMPLE mode was
|
||||
* requested, wm_prog_data_barycentric_modes() will swap out the top
|
||||
* mode for SAMPLE so this works regardless of whether SAMPLE was
|
||||
* requested or not.
|
||||
*/
|
||||
int sample_mode;
|
||||
if (BITFIELD_BIT(i) & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) {
|
||||
sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
|
||||
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
|
||||
} else {
|
||||
sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
|
||||
BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
|
||||
}
|
||||
assert(wm_prog_data->barycentric_interp_modes &
|
||||
BITFIELD_BIT(sample_mode));
|
||||
|
||||
if (i == sample_mode)
|
||||
continue;
|
||||
|
||||
uint8_t *barys = fs_payload().barycentric_coord_reg[i];
|
||||
|
||||
uint8_t *sample_barys = fs_payload().barycentric_coord_reg[sample_mode];
|
||||
assert(barys[0] && sample_barys[0]);
|
||||
|
||||
if (!loaded_flag) {
|
||||
check_dynamic_msaa_flag(ubld, wm_prog_data,
|
||||
BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < dispatch_width / 8; j++) {
|
||||
fs_inst *mov =
|
||||
ubld.MOV(brw_vec8_grf(barys[j / 2] + (j % 2) * 2, 0),
|
||||
brw_vec8_grf(sample_barys[j / 2] + (j % 2) * 2, 0));
|
||||
mov->predicate = BRW_PREDICATE_NORMAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
|
||||
this->delta_xy[i] = fetch_barycentric_reg(
|
||||
bld, fs_payload().barycentric_coord_reg[i]);
|
||||
|
@@ -555,7 +555,7 @@ brw_nir_lower_fs_inputs(nir_shader *nir,
|
||||
|
||||
if (!key->multisample_fbo) {
|
||||
nir_lower_single_sampled(nir);
|
||||
} else if (key->persample_interp) {
|
||||
} else if (key->persample_interp == BRW_ALWAYS) {
|
||||
nir_shader_instructions_pass(nir, lower_barycentric_per_sample,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
|
Reference in New Issue
Block a user