intel/fs: Make per-sample and coarse dispatch tri-state

Whenever one of them is BRW_SOMETIMES, we depend on dynamic flag pushed
in as a push constant.  In this case, we have to often have to do the
calculation both ways and SEL the result.  It's a bit more code but
decouples MSAA from the shader key.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21094>
This commit is contained in:
Jason Ekstrand
2021-11-19 16:32:24 -06:00
committed by Marge Bot
parent 43ca7f4178
commit d8dfd153c5
12 changed files with 349 additions and 80 deletions

View File

@@ -6449,7 +6449,8 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
intel_set_ps_dispatch_state(&ps, &batch->screen->devinfo, intel_set_ps_dispatch_state(&ps, &batch->screen->devinfo,
wm_prog_data, wm_prog_data,
ice->state.framebuffer.samples); ice->state.framebuffer.samples,
0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 = ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
@@ -6517,7 +6518,8 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0; psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; psx.PixelShaderIsPerSample =
brw_wm_prog_data_is_persample(wm_prog_data, 0);
/* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */ /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
if (wm_prog_data->uses_sample_mask) if (wm_prog_data->uses_sample_mask)
@@ -7291,7 +7293,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
else else
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
if (wm_prog_data->persample_dispatch) if (brw_wm_prog_data_is_persample(wm_prog_data, 0))
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
else else
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;

View File

@@ -4820,7 +4820,8 @@ iris_store_fs_state(const struct intel_device_info *devinfo,
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0; psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; psx.PixelShaderIsPerSample =
brw_wm_prog_data_is_persample(wm_prog_data, 0);
psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
#if GFX_VER >= 9 #if GFX_VER >= 9
@@ -6274,7 +6275,8 @@ iris_upload_dirty_render_state(struct iris_context *ice,
uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
_iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) { _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) {
intel_set_ps_dispatch_state(&ps, batch->screen->devinfo, intel_set_ps_dispatch_state(&ps, batch->screen->devinfo,
wm_prog_data, cso_fb->samples); wm_prog_data, cso_fb->samples,
0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 = ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);

View File

@@ -899,7 +899,8 @@ blorp_emit_ps_config(struct blorp_batch *batch,
if (prog_data) { if (prog_data) {
intel_set_ps_dispatch_state(&ps, devinfo, prog_data, intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
params->num_samples); params->num_samples,
0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 = ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
@@ -981,7 +982,8 @@ blorp_emit_ps_config(struct blorp_batch *batch,
if (prog_data) { if (prog_data) {
intel_set_ps_dispatch_state(&ps, devinfo, prog_data, intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
params->num_samples); params->num_samples,
0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 = ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);

View File

@@ -43,7 +43,8 @@ static inline void
intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps, intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
const struct intel_device_info *devinfo, const struct intel_device_info *devinfo,
const struct brw_wm_prog_data *prog_data, const struct brw_wm_prog_data *prog_data,
unsigned rasterization_samples) unsigned rasterization_samples,
enum brw_wm_msaa_flags msaa_flags)
{ {
assert(rasterization_samples != 0); assert(rasterization_samples != 0);
@@ -72,7 +73,10 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
enable_8 = false; enable_8 = false;
#endif #endif
if (prog_data->persample_dispatch) { const bool is_persample_dispatch =
brw_wm_prog_data_is_persample(prog_data, msaa_flags);
if (is_persample_dispatch) {
/* TGL PRMs, Volume 2d: Command Reference: Structures: /* TGL PRMs, Volume 2d: Command Reference: Structures:
* 3DSTATE_PS_BODY::32 Pixel Dispatch Enable: * 3DSTATE_PS_BODY::32 Pixel Dispatch Enable:
* *
@@ -108,8 +112,7 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
* *
* 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8. * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
*/ */
if (GFX_VER >= 9 && rasterization_samples == 16 && if (GFX_VER >= 9 && rasterization_samples == 16 && !is_persample_dispatch) {
!prog_data->persample_dispatch) {
assert(enable_8 || enable_16); assert(enable_8 || enable_16);
enable_32 = false; enable_32 = false;
} }

View File

@@ -28,6 +28,7 @@
#include "c11/threads.h" #include "c11/threads.h"
#include "dev/intel_device_info.h" #include "dev/intel_device_info.h"
#include "util/macros.h" #include "util/macros.h"
#include "util/enum_operators.h"
#include "util/ralloc.h" #include "util/ralloc.h"
#include "util/u_math.h" #include "util/u_math.h"
#include "brw_isa_info.h" #include "brw_isa_info.h"
@@ -464,6 +465,12 @@ enum brw_sometimes {
BRW_ALWAYS BRW_ALWAYS
}; };
static inline enum brw_sometimes
brw_sometimes_invert(enum brw_sometimes x)
{
return (enum brw_sometimes)((int)BRW_ALWAYS - (int)x);
}
/** The program key for Fragment/Pixel Shaders. */ /** The program key for Fragment/Pixel Shaders. */
struct brw_wm_prog_key { struct brw_wm_prog_key {
struct brw_base_prog_key base; struct brw_base_prog_key base;
@@ -833,6 +840,29 @@ enum brw_pixel_shader_computed_depth_mode {
BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */ BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
}; };
enum brw_wm_msaa_flags {
/** Must be set whenever any dynamic MSAA is used
*
* This flag mostly exists to let us assert that the driver understands
* dynamic MSAA so we don't run into trouble with drivers that don't.
*/
BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0),
/** True if the framebuffer is multisampled */
BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1),
/** True if this shader has been dispatched per-sample */
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
/** True if this shader has been dispatched coarse
*
* This is intentionally chose to be bit 18 to correspond to the coarse
* write bit in the FB write message descriptor.
*/
BRW_WM_MSAA_FLAG_COARSE_DISPATCH = (1 << 18),
};
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(enum brw_wm_msaa_flags)
/* Data about a particular attempt to compile a program. Note that /* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state * there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different * corresponding to a different brw_wm_prog_key struct, with different
@@ -872,7 +902,6 @@ struct brw_wm_prog_data {
bool dispatch_16; bool dispatch_16;
bool dispatch_32; bool dispatch_32;
bool dual_src_blend; bool dual_src_blend;
bool persample_dispatch;
bool uses_pos_offset; bool uses_pos_offset;
bool uses_omask; bool uses_omask;
bool uses_kill; bool uses_kill;
@@ -888,10 +917,23 @@ struct brw_wm_prog_data {
bool contains_flat_varying; bool contains_flat_varying;
bool contains_noperspective_varying; bool contains_noperspective_varying;
/** True if the shader wants sample shading
*
* This corresponds to whether or not a gl_SampleId, gl_SamplePosition, or
* a sample-qualified input are used in the shader. It is independent of
* GL_MIN_SAMPLE_SHADING_VALUE in GL or minSampleShading in Vulkan.
*/
bool sample_shading;
/** Should this shader be dispatched per-sample */
enum brw_sometimes persample_dispatch;
/** /**
* Shader is ran at the coarse pixel shading dispatch rate (3DSTATE_CPS). * Shader is ran at the coarse pixel shading dispatch rate (3DSTATE_CPS).
*/ */
bool per_coarse_pixel_dispatch; enum brw_sometimes coarse_pixel_dispatch;
unsigned msaa_flags_param;
/** /**
* Mask of which interpolation modes are required by the fragment shader. * Mask of which interpolation modes are required by the fragment shader.
@@ -1023,6 +1065,50 @@ _brw_wm_prog_data_reg_blocks(const struct brw_wm_prog_data *prog_data,
_brw_wm_prog_data_reg_blocks(prog_data, \ _brw_wm_prog_data_reg_blocks(prog_data, \
brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx)) brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx))
static inline bool
brw_wm_prog_data_is_persample(const struct brw_wm_prog_data *prog_data,
enum brw_wm_msaa_flags pushed_msaa_flags)
{
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) {
if (!(pushed_msaa_flags & BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO))
return false;
if (prog_data->sample_shading)
assert(pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH)
assert(prog_data->persample_dispatch != BRW_NEVER);
else
assert(prog_data->persample_dispatch != BRW_ALWAYS);
return (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH) != 0;
}
assert(prog_data->persample_dispatch == BRW_ALWAYS ||
prog_data->persample_dispatch == BRW_NEVER);
return prog_data->persample_dispatch;
}
static inline bool
brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
enum brw_wm_msaa_flags pushed_msaa_flags)
{
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) {
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_COARSE_DISPATCH)
assert(prog_data->coarse_pixel_dispatch != BRW_NEVER);
else
assert(prog_data->coarse_pixel_dispatch != BRW_ALWAYS);
return pushed_msaa_flags & BRW_WM_MSAA_FLAG_COARSE_DISPATCH;
}
assert(prog_data->coarse_pixel_dispatch == BRW_ALWAYS ||
prog_data->coarse_pixel_dispatch == BRW_NEVER);
return prog_data->coarse_pixel_dispatch;
}
struct brw_push_const_block { struct brw_push_const_block {
unsigned dwords; /* Dword count, not reg aligned */ unsigned dwords; /* Dword count, not reg aligned */
unsigned regs; unsigned regs;

View File

@@ -1220,7 +1220,7 @@ fs_visitor::emit_samplepos_setup()
const fs_builder abld = bld.annotate("compute sample position"); const fs_builder abld = bld.annotate("compute sample position");
fs_reg pos = abld.vgrf(BRW_REGISTER_TYPE_F, 2); fs_reg pos = abld.vgrf(BRW_REGISTER_TYPE_F, 2);
if (!wm_prog_data->persample_dispatch) { if (wm_prog_data->persample_dispatch == BRW_NEVER) {
/* From ARB_sample_shading specification: /* From ARB_sample_shading specification:
* "When rendering to a non-multisample buffer, or if multisample * "When rendering to a non-multisample buffer, or if multisample
* rasterization is disabled, gl_SamplePosition will always be * rasterization is disabled, gl_SamplePosition will always be
@@ -1255,6 +1255,16 @@ fs_visitor::emit_samplepos_setup()
abld.MUL(offset(pos, abld, i), tmp_f, brw_imm_f(1 / 16.0f)); abld.MUL(offset(pos, abld, i), tmp_f, brw_imm_f(1 / 16.0f));
} }
if (wm_prog_data->persample_dispatch == BRW_SOMETIMES) {
check_dynamic_msaa_flag(abld, wm_prog_data,
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
for (unsigned i = 0; i < 2; i++) {
set_predicate(BRW_PREDICATE_NORMAL,
bld.SEL(offset(pos, abld, i), offset(pos, abld, i),
brw_imm_f(0.5f)));
}
}
return pos; return pos;
} }
@@ -1369,12 +1379,12 @@ fs_visitor::emit_samplemaskin_setup()
assert(devinfo->ver >= 6); assert(devinfo->ver >= 6);
/* The HW doesn't provide us with expected values. */ /* The HW doesn't provide us with expected values. */
assert(!wm_prog_data->per_coarse_pixel_dispatch); assert(wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS);
fs_reg coverage_mask = fs_reg coverage_mask =
fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D); fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
if (!wm_prog_data->persample_dispatch) if (wm_prog_data->persample_dispatch == BRW_NEVER)
return coverage_mask; return coverage_mask;
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask, /* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
@@ -1399,6 +1409,13 @@ fs_visitor::emit_samplemaskin_setup()
fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_D); fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_D);
abld.AND(mask, enabled_mask, coverage_mask); abld.AND(mask, enabled_mask, coverage_mask);
if (wm_prog_data->persample_dispatch == BRW_ALWAYS)
return mask;
check_dynamic_msaa_flag(abld, wm_prog_data,
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
set_predicate(BRW_PREDICATE_NORMAL, abld.SEL(mask, mask, coverage_mask));
return mask; return mask;
} }
@@ -1413,7 +1430,7 @@ fs_visitor::emit_shading_rate_setup()
/* Coarse pixel shading size fields overlap with other fields of not in /* Coarse pixel shading size fields overlap with other fields of not in
* coarse pixel dispatch mode, so report 0 when that's not the case. * coarse pixel dispatch mode, so report 0 when that's not the case.
*/ */
if (!wm_prog_data->per_coarse_pixel_dispatch) if (wm_prog_data->coarse_pixel_dispatch == BRW_NEVER)
return brw_imm_ud(0); return brw_imm_ud(0);
const fs_builder abld = bld.annotate("compute fragment shading rate"); const fs_builder abld = bld.annotate("compute fragment shading rate");
@@ -1439,6 +1456,13 @@ fs_visitor::emit_shading_rate_setup()
fs_reg rate = abld.vgrf(BRW_REGISTER_TYPE_UD); fs_reg rate = abld.vgrf(BRW_REGISTER_TYPE_UD);
abld.OR(rate, int_rate_x, int_rate_y); abld.OR(rate, int_rate_x, int_rate_y);
if (wm_prog_data->coarse_pixel_dispatch == BRW_ALWAYS)
return rate;
check_dynamic_msaa_flag(abld, wm_prog_data,
BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
set_predicate(BRW_PREDICATE_NORMAL, abld.SEL(rate, rate, brw_imm_ud(0)));
return rate; return rate;
} }
@@ -7256,10 +7280,14 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
prog_data->computed_stencil = prog_data->computed_stencil =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
prog_data->persample_dispatch = prog_data->sample_shading =
key->multisample_fbo && shader->info.fs.uses_sample_shading ||
(key->persample_interp || shader->info.outputs_read;
shader->info.fs.uses_sample_shading);
prog_data->persample_dispatch = BRW_NEVER;
if (key->multisample_fbo &&
(key->persample_interp || prog_data->sample_shading))
prog_data->persample_dispatch = BRW_ALWAYS;
if (devinfo->ver >= 6) { if (devinfo->ver >= 6) {
prog_data->uses_sample_mask = prog_data->uses_sample_mask =
@@ -7274,7 +7302,8 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
* per-sample dispatch. If we need gl_SamplePosition and we don't have * per-sample dispatch. If we need gl_SamplePosition and we don't have
* persample dispatch, we hard-code it to 0.5. * persample dispatch, we hard-code it to 0.5.
*/ */
prog_data->uses_pos_offset = prog_data->persample_dispatch && prog_data->uses_pos_offset =
prog_data->persample_dispatch != BRW_NEVER &&
(BITSET_TEST(shader->info.system_values_read, (BITSET_TEST(shader->info.system_values_read,
SYSTEM_VALUE_SAMPLE_POS) || SYSTEM_VALUE_SAMPLE_POS) ||
BITSET_TEST(shader->info.system_values_read, BITSET_TEST(shader->info.system_values_read,
@@ -7295,13 +7324,16 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
/* You can't be coarse and per-sample */ /* You can't be coarse and per-sample */
assert(!key->coarse_pixel || !key->persample_interp); assert(!key->coarse_pixel || !key->persample_interp);
prog_data->per_coarse_pixel_dispatch = prog_data->coarse_pixel_dispatch =
key->coarse_pixel && brw_sometimes_invert(prog_data->persample_dispatch);
!shader->info.fs.uses_sample_shading && if (!key->coarse_pixel ||
!prog_data->uses_omask && prog_data->uses_omask ||
!prog_data->uses_sample_mask && prog_data->sample_shading ||
(prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) && prog_data->uses_sample_mask ||
!prog_data->computed_stencil; (prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) ||
prog_data->computed_stencil) {
prog_data->coarse_pixel_dispatch = BRW_NEVER;
}
/* We choose to always enable VMask prior to XeHP, as it would cause /* We choose to always enable VMask prior to XeHP, as it would cause
* us to lose out on the eliminate_find_live_channel() optimization. * us to lose out on the eliminate_find_live_channel() optimization.
@@ -7309,16 +7341,16 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
prog_data->uses_vmask = devinfo->verx10 < 125 || prog_data->uses_vmask = devinfo->verx10 < 125 ||
shader->info.fs.needs_quad_helper_invocations || shader->info.fs.needs_quad_helper_invocations ||
shader->info.fs.needs_all_helper_invocations || shader->info.fs.needs_all_helper_invocations ||
prog_data->per_coarse_pixel_dispatch; prog_data->coarse_pixel_dispatch != BRW_NEVER;
prog_data->uses_src_w = prog_data->uses_src_w =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
prog_data->uses_src_depth = prog_data->uses_src_depth =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) && BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
!prog_data->per_coarse_pixel_dispatch; prog_data->coarse_pixel_dispatch != BRW_ALWAYS;
prog_data->uses_depth_w_coefficients = prog_data->uses_depth_w_coefficients =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) && BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
prog_data->per_coarse_pixel_dispatch; prog_data->coarse_pixel_dispatch != BRW_NEVER;
calculate_urb_setup(devinfo, key, prog_data, shader, mue_map); calculate_urb_setup(devinfo, key, prog_data, shader, mue_map);
brw_compute_flat_inputs(prog_data, shader); brw_compute_flat_inputs(prog_data, shader);

View File

@@ -721,6 +721,24 @@ namespace brw {
return tmp; return tmp;
} }
inline fs_reg
dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
{
return fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
BRW_REGISTER_TYPE_UD);
}
inline void
check_dynamic_msaa_flag(const fs_builder &bld,
const struct brw_wm_prog_data *wm_prog_data,
enum brw_wm_msaa_flags flag)
{
fs_inst *inst = bld.AND(bld.null_reg_ud(),
dynamic_msaa_flags(wm_prog_data),
brw_imm_ud(flag));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
}
bool bool
lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i); lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
} }

View File

@@ -291,10 +291,10 @@ fs_visitor::emit_interpolation_setup_gfx6()
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data); struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */ fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
fs_reg int_pixel_offset_xy; /* Used on Gen8+ */ fs_reg int_sample_offset_xy; /* Used on Gen8+ */
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y; fs_reg half_int_sample_offset_x, half_int_sample_offset_y;
if (!wm_prog_data->per_coarse_pixel_dispatch) { if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
/* The thread payload only delivers subspan locations (ss0, ss1, /* The thread payload only delivers subspan locations (ss0, ss1,
* ss2, ...). Since subspans covers 2x2 pixels blocks, we need to * ss2, ...). Since subspans covers 2x2 pixels blocks, we need to
* generate 4 pixel coordinates out of each subspan location. We do this * generate 4 pixel coordinates out of each subspan location. We do this
@@ -324,9 +324,9 @@ fs_visitor::emit_interpolation_setup_gfx6()
* coordinates out of 2 subspans coordinates in a single ADD instruction * coordinates out of 2 subspans coordinates in a single ADD instruction
* (twice the operation above). * (twice the operation above).
*/ */
int_pixel_offset_xy = fs_reg(brw_imm_v(0x11001010)); int_sample_offset_xy = fs_reg(brw_imm_v(0x11001010));
half_int_pixel_offset_x = fs_reg(brw_imm_uw(0)); half_int_sample_offset_x = fs_reg(brw_imm_uw(0));
half_int_pixel_offset_y = fs_reg(brw_imm_uw(0)); half_int_sample_offset_y = fs_reg(brw_imm_uw(0));
/* On Gfx12.5, because of regioning restrictions, the interpolation code /* On Gfx12.5, because of regioning restrictions, the interpolation code
* is slightly different and works off X & Y only inputs. The ordering * is slightly different and works off X & Y only inputs. The ordering
* of the half bytes here is a bit odd, with each subspan replicated * of the half bytes here is a bit odd, with each subspan replicated
@@ -336,9 +336,14 @@ fs_visitor::emit_interpolation_setup_gfx6()
* X offset: 0 0 1 0 0 0 1 0 * X offset: 0 0 1 0 0 0 1 0
* Y offset: 0 0 0 0 1 0 1 0 * Y offset: 0 0 0 0 1 0 1 0
*/ */
int_pixel_offset_x = fs_reg(brw_imm_v(0x01000100)); int_sample_offset_x = fs_reg(brw_imm_v(0x01000100));
int_pixel_offset_y = fs_reg(brw_imm_v(0x01010000)); int_sample_offset_y = fs_reg(brw_imm_v(0x01010000));
} else { }
fs_reg int_coarse_offset_x, int_coarse_offset_y; /* Used on Gen12HP+ */
fs_reg int_coarse_offset_xy; /* Used on Gen8+ */
fs_reg half_int_coarse_offset_x, half_int_coarse_offset_y;
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
/* In coarse pixel dispatch we have to do the same ADD instruction that /* In coarse pixel dispatch we have to do the same ADD instruction that
* we do in normal per pixel dispatch, except this time we're not adding * we do in normal per pixel dispatch, except this time we're not adding
* 1 in each direction, but instead the coarse pixel size. * 1 in each direction, but instead the coarse pixel size.
@@ -354,34 +359,84 @@ fs_visitor::emit_interpolation_setup_gfx6()
/* To build the array of half bytes we do and AND operation with the /* To build the array of half bytes we do and AND operation with the
* right mask in X. * right mask in X.
*/ */
int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW); int_coarse_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
dbld.AND(int_pixel_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0f000f00)); dbld.AND(int_coarse_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0f000f00));
/* And the right mask in Y. */ /* And the right mask in Y. */
int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW); int_coarse_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
dbld.AND(int_pixel_offset_y, byte_offset(r1_0, 1), brw_imm_v(0x0f0f0000)); dbld.AND(int_coarse_offset_y, byte_offset(r1_0, 1), brw_imm_v(0x0f0f0000));
} else { } else {
/* To build the array of half bytes we do and AND operation with the /* To build the array of half bytes we do and AND operation with the
* right mask in X. * right mask in X.
*/ */
int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW); int_coarse_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
dbld.AND(int_pixel_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0000f0f0)); dbld.AND(int_coarse_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0000f0f0));
/* And the right mask in Y. */ /* And the right mask in Y. */
int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW); int_coarse_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
dbld.AND(int_pixel_offset_y, byte_offset(r1_0, 1), brw_imm_v(0xff000000)); dbld.AND(int_coarse_offset_y, byte_offset(r1_0, 1), brw_imm_v(0xff000000));
/* Finally OR the 2 registers. */ /* Finally OR the 2 registers. */
int_pixel_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW); int_coarse_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
dbld.OR(int_pixel_offset_xy, int_pixel_offset_x, int_pixel_offset_y); dbld.OR(int_coarse_offset_xy, int_coarse_offset_x, int_coarse_offset_y);
} }
/* Also compute the half pixel size used to center pixels. */ /* Also compute the half coarse size used to center coarses. */
half_int_pixel_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW); half_int_coarse_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
half_int_pixel_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW); half_int_coarse_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
bld.SHR(half_int_pixel_offset_x, suboffset(r1_0, 0), brw_imm_ud(1)); bld.SHR(half_int_coarse_offset_x, suboffset(r1_0, 0), brw_imm_ud(1));
bld.SHR(half_int_pixel_offset_y, suboffset(r1_0, 1), brw_imm_ud(1)); bld.SHR(half_int_coarse_offset_y, suboffset(r1_0, 1), brw_imm_ud(1));
}
fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */
fs_reg int_pixel_offset_xy; /* Used on Gen8+ */
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
switch (wm_prog_data->coarse_pixel_dispatch) {
case BRW_NEVER:
#define COPY_OFFSET_REG(prefix, suffix) \
prefix##_pixel_##suffix = prefix##_sample_##suffix;
COPY_OFFSET_REG(int, offset_x)
COPY_OFFSET_REG(int, offset_y)
COPY_OFFSET_REG(int, offset_xy)
COPY_OFFSET_REG(half_int, offset_x)
COPY_OFFSET_REG(half_int, offset_y)
#undef COPY_OFFSET_REG
break;
case BRW_SOMETIMES:
check_dynamic_msaa_flag(bld, wm_prog_data,
BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
#define COPY_OFFSET_REG(prefix, suffix) \
prefix##_pixel_##suffix = bld.vgrf(BRW_REGISTER_TYPE_UW); \
bld.SEL(prefix##_pixel_##suffix, \
prefix##_coarse_##suffix, \
prefix##_pixel_##suffix); \
COPY_OFFSET_REG(int, offset_x)
COPY_OFFSET_REG(int, offset_y)
COPY_OFFSET_REG(int, offset_xy)
COPY_OFFSET_REG(half_int, offset_x)
COPY_OFFSET_REG(half_int, offset_y)
#undef COPY_OFFSET_REG
break;
case BRW_ALWAYS:
#define COPY_OFFSET_REG(prefix, suffix) \
prefix##_pixel_##suffix = prefix##_coarse_##suffix;
COPY_OFFSET_REG(int, offset_x)
COPY_OFFSET_REG(int, offset_y)
COPY_OFFSET_REG(int, offset_xy)
COPY_OFFSET_REG(half_int, offset_x)
COPY_OFFSET_REG(half_int, offset_y)
#undef COPY_OFFSET_REG
break;
} }
for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) { for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {
@@ -401,11 +456,15 @@ fs_visitor::emit_interpolation_setup_gfx6()
fs_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)), fs_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
int_pixel_offset_y); int_pixel_offset_y);
if (wm_prog_data->per_coarse_pixel_dispatch) { if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
dbld.ADD(int_pixel_x, int_pixel_x, fs_inst *addx = dbld.ADD(int_pixel_x, int_pixel_x,
horiz_stride(half_int_pixel_offset_x, 0)); horiz_stride(half_int_pixel_offset_x, 0));
dbld.ADD(int_pixel_y, int_pixel_y, fs_inst *addy = dbld.ADD(int_pixel_y, int_pixel_y,
horiz_stride(half_int_pixel_offset_y, 0)); horiz_stride(half_int_pixel_offset_y, 0));
if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
addx->predicate = BRW_PREDICATE_NORMAL;
addy->predicate = BRW_PREDICATE_NORMAL;
}
} }
hbld.MOV(offset(pixel_x, hbld, i), horiz_stride(int_pixel_x, 2)); hbld.MOV(offset(pixel_x, hbld, i), horiz_stride(int_pixel_x, 2));
@@ -463,8 +522,8 @@ fs_visitor::emit_interpolation_setup_gfx6()
} }
abld = bld.annotate("compute pos.z"); abld = bld.annotate("compute pos.z");
fs_reg coarse_z;
if (wm_prog_data->uses_depth_w_coefficients) { if (wm_prog_data->uses_depth_w_coefficients) {
assert(!wm_prog_data->uses_src_depth);
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate /* In coarse pixel mode, the HW doesn't interpolate Z coordinate
* properly. In the same way we have to add the coarse pixel size to * properly. In the same way we have to add the coarse pixel size to
* pixels locations, here we recompute the Z value with 2 coefficients * pixels locations, here we recompute the Z value with 2 coefficients
@@ -501,9 +560,9 @@ fs_visitor::emit_interpolation_setup_gfx6()
abld.MAD(float_pixel_x, float_pixel_x, brw_imm_f(0.5f), f_cps_width); abld.MAD(float_pixel_x, float_pixel_x, brw_imm_f(0.5f), f_cps_width);
abld.MAD(float_pixel_y, float_pixel_y, brw_imm_f(0.5f), f_cps_height); abld.MAD(float_pixel_y, float_pixel_y, brw_imm_f(0.5f), f_cps_height);
this->pixel_z = abld.vgrf(BRW_REGISTER_TYPE_F); coarse_z = abld.vgrf(BRW_REGISTER_TYPE_F);
abld.MAD(this->pixel_z, z_c0, z_cx, float_pixel_x); abld.MAD(coarse_z, z_c0, z_cx, float_pixel_x);
abld.MAD(this->pixel_z, this->pixel_z, z_cy, float_pixel_y); abld.MAD(coarse_z, coarse_z, z_cy, float_pixel_y);
} }
if (wm_prog_data->uses_src_depth) { if (wm_prog_data->uses_src_depth) {
@@ -511,6 +570,34 @@ fs_visitor::emit_interpolation_setup_gfx6()
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg); this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
} }
if (wm_prog_data->uses_depth_w_coefficients ||
wm_prog_data->uses_src_depth) {
fs_reg sample_z = this->pixel_z;
switch (wm_prog_data->coarse_pixel_dispatch) {
case BRW_NEVER:
assert(wm_prog_data->uses_src_depth);
assert(!wm_prog_data->uses_depth_w_coefficients);
this->pixel_z = sample_z;
break;
case BRW_SOMETIMES:
assert(wm_prog_data->uses_src_depth);
assert(wm_prog_data->uses_depth_w_coefficients);
this->pixel_z = abld.vgrf(BRW_REGISTER_TYPE_F);
/* We re-use the check_dynamic_msaa_flag() call from above */
abld.SEL(this->pixel_z, coarse_z, sample_z);
break;
case BRW_ALWAYS:
assert(!wm_prog_data->uses_src_depth);
assert(wm_prog_data->uses_depth_w_coefficients);
this->pixel_z = coarse_z;
break;
}
}
if (wm_prog_data->uses_src_w) { if (wm_prog_data->uses_src_w) {
abld = bld.annotate("compute pos.w"); abld = bld.annotate("compute pos.w");
this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg); this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg);

View File

@@ -359,7 +359,18 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
inst->desc = inst->desc =
(inst->group / 16) << 11 | /* rt slot group */ (inst->group / 16) << 11 | /* rt slot group */
brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt, brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,
prog_data->per_coarse_pixel_dispatch); 0 /* coarse_write */);
fs_reg desc = brw_imm_ud(0);
if (prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
inst->desc |= (1 << 18);
} else if (prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
STATIC_ASSERT(BRW_WM_MSAA_FLAG_COARSE_DISPATCH == (1 << 18));
const fs_builder &ubld = bld.exec_all().group(8, 0);
desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.AND(desc, dynamic_msaa_flags(prog_data),
brw_imm_ud(BRW_WM_MSAA_FLAG_COARSE_DISPATCH));
}
uint32_t ex_desc = 0; uint32_t ex_desc = 0;
if (devinfo->ver >= 11) { if (devinfo->ver >= 11) {
@@ -376,7 +387,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
inst->opcode = SHADER_OPCODE_SEND; inst->opcode = SHADER_OPCODE_SEND;
inst->resize_sources(3); inst->resize_sources(3);
inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
inst->src[0] = brw_imm_ud(0); inst->src[0] = desc;
inst->src[1] = brw_imm_ud(0); inst->src[1] = brw_imm_ud(0);
inst->src[2] = payload; inst->src[2] = payload;
inst->mlen = regs_written(load); inst->mlen = regs_written(load);
@@ -2456,8 +2467,6 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
fs_reg payload = brw_vec8_grf(0, 0); fs_reg payload = brw_vec8_grf(0, 0);
unsigned mlen = 1; unsigned mlen = 1;
const fs_reg desc = inst->src[1];
unsigned mode; unsigned mode;
switch (inst->opcode) { switch (inst->opcode) {
case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
@@ -2480,11 +2489,33 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
unreachable("Invalid interpolator instruction"); unreachable("Invalid interpolator instruction");
} }
fs_reg desc = inst->src[1];
uint32_t desc_imm = uint32_t desc_imm =
brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective, brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective,
wm_prog_data->per_coarse_pixel_dispatch, false /* coarse_pixel_rate */,
inst->exec_size, inst->group); inst->exec_size, inst->group);
if (wm_prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
desc_imm |= (1 << 15);
} else if (wm_prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
fs_reg orig_desc = desc;
const fs_builder &ubld = bld.exec_all().group(8, 0);
desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.AND(desc, dynamic_msaa_flags(wm_prog_data),
brw_imm_ud(BRW_WM_MSAA_FLAG_COARSE_DISPATCH));
/* The uniform is in bit 18 but we need it in bit 15 */
STATIC_ASSERT(BRW_WM_MSAA_FLAG_COARSE_DISPATCH == (1 << 18));
ubld.SHR(desc, desc, brw_imm_ud(3));
/* And, if it's AT_OFFSET, we might have a non-trivial descriptor */
if (orig_desc.file == IMM) {
desc_imm |= orig_desc.ud;
} else {
ubld.OR(desc, desc, orig_desc);
}
}
assert(bld.shader->devinfo->ver >= 7); assert(bld.shader->devinfo->ver >= 7);
inst->opcode = SHADER_OPCODE_SEND; inst->opcode = SHADER_OPCODE_SEND;
inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR; inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR;

View File

@@ -1526,7 +1526,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data, intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
ms != NULL ? ms->rasterization_samples : 1); ms != NULL ? ms->rasterization_samples : 1,
0 /* msaa_flags */);
ps.KernelStartPointer0 = fs_bin->kernel.offset + ps.KernelStartPointer0 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
@@ -1581,7 +1582,8 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
ps.PixelShaderValid = true; ps.PixelShaderValid = true;
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; ps.PixelShaderIsPerSample =
brw_wm_prog_data_is_persample(wm_prog_data, 0);
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
@@ -1603,7 +1605,7 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */ assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */
if (!wm_prog_data->uses_sample_mask) if (!wm_prog_data->uses_sample_mask)
ps.InputCoverageMaskState = ICMS_NONE; ps.InputCoverageMaskState = ICMS_NONE;
else if (wm_prog_data->per_coarse_pixel_dispatch) else if (brw_wm_prog_data_is_coarse(wm_prog_data, 0))
ps.InputCoverageMaskState = ICMS_NORMAL; ps.InputCoverageMaskState = ICMS_NORMAL;
else if (wm_prog_data->post_depth_coverage) else if (wm_prog_data->post_depth_coverage)
ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
@@ -1613,13 +1615,14 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
#if GFX_VER >= 11 #if GFX_VER >= 11
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients = ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
wm_prog_data->uses_depth_w_coefficients; wm_prog_data->uses_depth_w_coefficients;
ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch; ps.PixelShaderIsPerCoarsePixel = wm_prog_data->coarse_pixel_dispatch;
#endif #endif
#if GFX_VERx10 >= 125 #if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses /* TODO: We should only require this when the last geometry shader uses
* a fragment shading rate that is not constant. * a fragment shading rate that is not constant.
*/ */
ps.EnablePSDependencyOnCPsizeChange = wm_prog_data->per_coarse_pixel_dispatch; ps.EnablePSDependencyOnCPsizeChange =
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
#endif #endif
} }
} }

View File

@@ -315,7 +315,8 @@ genX(emit_shading_rate)(struct anv_batch *batch,
const struct vk_fragment_shading_rate_state *fsr) const struct vk_fragment_shading_rate_state *fsr)
{ {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch; const bool cps_enable = wm_prog_data &&
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
#if GFX_VER == 11 #if GFX_VER == 11
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) { anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {

View File

@@ -1605,7 +1605,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
wm.PixelShaderKillsPixel; wm.PixelShaderKillsPixel;
if (ms != NULL && ms->rasterization_samples > 1) { if (ms != NULL && ms->rasterization_samples > 1) {
if (wm_prog_data->persample_dispatch) { if (brw_wm_prog_data_is_persample(wm_prog_data, 0)) {
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
} else { } else {
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
@@ -1671,7 +1671,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data, intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
ms != NULL ? ms->rasterization_samples : 1); ms != NULL ? ms->rasterization_samples : 1,
0 /* msaa_flags */);
ps.KernelStartPointer0 = fs_bin->kernel.offset + ps.KernelStartPointer0 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
@@ -1739,7 +1740,8 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
ps.PixelShaderValid = true; ps.PixelShaderValid = true;
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; ps.PixelShaderIsPerSample =
brw_wm_prog_data_is_persample(wm_prog_data, 0);
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;