intel/fs: Make per-sample and coarse dispatch tri-state
Whenever one of them is BRW_SOMETIMES, we depend on dynamic flag pushed in as a push constant. In this case, we have to often have to do the calculation both ways and SEL the result. It's a bit more code but decouples MSAA from the shader key. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21094>
This commit is contained in:

committed by
Marge Bot

parent
43ca7f4178
commit
d8dfd153c5
@@ -6449,7 +6449,8 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
|
|||||||
|
|
||||||
intel_set_ps_dispatch_state(&ps, &batch->screen->devinfo,
|
intel_set_ps_dispatch_state(&ps, &batch->screen->devinfo,
|
||||||
wm_prog_data,
|
wm_prog_data,
|
||||||
ice->state.framebuffer.samples);
|
ice->state.framebuffer.samples,
|
||||||
|
0 /* msaa_flags */);
|
||||||
|
|
||||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
||||||
@@ -6517,7 +6518,8 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
|
|||||||
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
|
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
|
||||||
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
||||||
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
||||||
psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
|
psx.PixelShaderIsPerSample =
|
||||||
|
brw_wm_prog_data_is_persample(wm_prog_data, 0);
|
||||||
|
|
||||||
/* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
|
/* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
|
||||||
if (wm_prog_data->uses_sample_mask)
|
if (wm_prog_data->uses_sample_mask)
|
||||||
@@ -7291,7 +7293,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
|
|||||||
else
|
else
|
||||||
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
|
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
|
||||||
|
|
||||||
if (wm_prog_data->persample_dispatch)
|
if (brw_wm_prog_data_is_persample(wm_prog_data, 0))
|
||||||
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
||||||
else
|
else
|
||||||
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
||||||
|
@@ -4820,7 +4820,8 @@ iris_store_fs_state(const struct intel_device_info *devinfo,
|
|||||||
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
|
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
|
||||||
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
||||||
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
||||||
psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
|
psx.PixelShaderIsPerSample =
|
||||||
|
brw_wm_prog_data_is_persample(wm_prog_data, 0);
|
||||||
psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
||||||
|
|
||||||
#if GFX_VER >= 9
|
#if GFX_VER >= 9
|
||||||
@@ -6274,7 +6275,8 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||||||
uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
|
uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
|
||||||
_iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) {
|
_iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) {
|
||||||
intel_set_ps_dispatch_state(&ps, batch->screen->devinfo,
|
intel_set_ps_dispatch_state(&ps, batch->screen->devinfo,
|
||||||
wm_prog_data, cso_fb->samples);
|
wm_prog_data, cso_fb->samples,
|
||||||
|
0 /* msaa_flags */);
|
||||||
|
|
||||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
||||||
|
@@ -899,7 +899,8 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||||||
|
|
||||||
if (prog_data) {
|
if (prog_data) {
|
||||||
intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
|
intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
|
||||||
params->num_samples);
|
params->num_samples,
|
||||||
|
0 /* msaa_flags */);
|
||||||
|
|
||||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||||
@@ -981,7 +982,8 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||||||
|
|
||||||
if (prog_data) {
|
if (prog_data) {
|
||||||
intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
|
intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
|
||||||
params->num_samples);
|
params->num_samples,
|
||||||
|
0 /* msaa_flags */);
|
||||||
|
|
||||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||||
|
@@ -43,7 +43,8 @@ static inline void
|
|||||||
intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
||||||
const struct intel_device_info *devinfo,
|
const struct intel_device_info *devinfo,
|
||||||
const struct brw_wm_prog_data *prog_data,
|
const struct brw_wm_prog_data *prog_data,
|
||||||
unsigned rasterization_samples)
|
unsigned rasterization_samples,
|
||||||
|
enum brw_wm_msaa_flags msaa_flags)
|
||||||
{
|
{
|
||||||
assert(rasterization_samples != 0);
|
assert(rasterization_samples != 0);
|
||||||
|
|
||||||
@@ -72,7 +73,10 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
|||||||
enable_8 = false;
|
enable_8 = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (prog_data->persample_dispatch) {
|
const bool is_persample_dispatch =
|
||||||
|
brw_wm_prog_data_is_persample(prog_data, msaa_flags);
|
||||||
|
|
||||||
|
if (is_persample_dispatch) {
|
||||||
/* TGL PRMs, Volume 2d: Command Reference: Structures:
|
/* TGL PRMs, Volume 2d: Command Reference: Structures:
|
||||||
* 3DSTATE_PS_BODY::32 Pixel Dispatch Enable:
|
* 3DSTATE_PS_BODY::32 Pixel Dispatch Enable:
|
||||||
*
|
*
|
||||||
@@ -108,8 +112,7 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
|||||||
*
|
*
|
||||||
* 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
|
* 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
|
||||||
*/
|
*/
|
||||||
if (GFX_VER >= 9 && rasterization_samples == 16 &&
|
if (GFX_VER >= 9 && rasterization_samples == 16 && !is_persample_dispatch) {
|
||||||
!prog_data->persample_dispatch) {
|
|
||||||
assert(enable_8 || enable_16);
|
assert(enable_8 || enable_16);
|
||||||
enable_32 = false;
|
enable_32 = false;
|
||||||
}
|
}
|
||||||
|
@@ -28,6 +28,7 @@
|
|||||||
#include "c11/threads.h"
|
#include "c11/threads.h"
|
||||||
#include "dev/intel_device_info.h"
|
#include "dev/intel_device_info.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
|
#include "util/enum_operators.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
#include "brw_isa_info.h"
|
#include "brw_isa_info.h"
|
||||||
@@ -464,6 +465,12 @@ enum brw_sometimes {
|
|||||||
BRW_ALWAYS
|
BRW_ALWAYS
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline enum brw_sometimes
|
||||||
|
brw_sometimes_invert(enum brw_sometimes x)
|
||||||
|
{
|
||||||
|
return (enum brw_sometimes)((int)BRW_ALWAYS - (int)x);
|
||||||
|
}
|
||||||
|
|
||||||
/** The program key for Fragment/Pixel Shaders. */
|
/** The program key for Fragment/Pixel Shaders. */
|
||||||
struct brw_wm_prog_key {
|
struct brw_wm_prog_key {
|
||||||
struct brw_base_prog_key base;
|
struct brw_base_prog_key base;
|
||||||
@@ -833,6 +840,29 @@ enum brw_pixel_shader_computed_depth_mode {
|
|||||||
BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
|
BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum brw_wm_msaa_flags {
|
||||||
|
/** Must be set whenever any dynamic MSAA is used
|
||||||
|
*
|
||||||
|
* This flag mostly exists to let us assert that the driver understands
|
||||||
|
* dynamic MSAA so we don't run into trouble with drivers that don't.
|
||||||
|
*/
|
||||||
|
BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0),
|
||||||
|
|
||||||
|
/** True if the framebuffer is multisampled */
|
||||||
|
BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1),
|
||||||
|
|
||||||
|
/** True if this shader has been dispatched per-sample */
|
||||||
|
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
|
||||||
|
|
||||||
|
/** True if this shader has been dispatched coarse
|
||||||
|
*
|
||||||
|
* This is intentionally chose to be bit 18 to correspond to the coarse
|
||||||
|
* write bit in the FB write message descriptor.
|
||||||
|
*/
|
||||||
|
BRW_WM_MSAA_FLAG_COARSE_DISPATCH = (1 << 18),
|
||||||
|
};
|
||||||
|
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(enum brw_wm_msaa_flags)
|
||||||
|
|
||||||
/* Data about a particular attempt to compile a program. Note that
|
/* Data about a particular attempt to compile a program. Note that
|
||||||
* there can be many of these, each in a different GL state
|
* there can be many of these, each in a different GL state
|
||||||
* corresponding to a different brw_wm_prog_key struct, with different
|
* corresponding to a different brw_wm_prog_key struct, with different
|
||||||
@@ -872,7 +902,6 @@ struct brw_wm_prog_data {
|
|||||||
bool dispatch_16;
|
bool dispatch_16;
|
||||||
bool dispatch_32;
|
bool dispatch_32;
|
||||||
bool dual_src_blend;
|
bool dual_src_blend;
|
||||||
bool persample_dispatch;
|
|
||||||
bool uses_pos_offset;
|
bool uses_pos_offset;
|
||||||
bool uses_omask;
|
bool uses_omask;
|
||||||
bool uses_kill;
|
bool uses_kill;
|
||||||
@@ -888,10 +917,23 @@ struct brw_wm_prog_data {
|
|||||||
bool contains_flat_varying;
|
bool contains_flat_varying;
|
||||||
bool contains_noperspective_varying;
|
bool contains_noperspective_varying;
|
||||||
|
|
||||||
|
/** True if the shader wants sample shading
|
||||||
|
*
|
||||||
|
* This corresponds to whether or not a gl_SampleId, gl_SamplePosition, or
|
||||||
|
* a sample-qualified input are used in the shader. It is independent of
|
||||||
|
* GL_MIN_SAMPLE_SHADING_VALUE in GL or minSampleShading in Vulkan.
|
||||||
|
*/
|
||||||
|
bool sample_shading;
|
||||||
|
|
||||||
|
/** Should this shader be dispatched per-sample */
|
||||||
|
enum brw_sometimes persample_dispatch;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shader is ran at the coarse pixel shading dispatch rate (3DSTATE_CPS).
|
* Shader is ran at the coarse pixel shading dispatch rate (3DSTATE_CPS).
|
||||||
*/
|
*/
|
||||||
bool per_coarse_pixel_dispatch;
|
enum brw_sometimes coarse_pixel_dispatch;
|
||||||
|
|
||||||
|
unsigned msaa_flags_param;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mask of which interpolation modes are required by the fragment shader.
|
* Mask of which interpolation modes are required by the fragment shader.
|
||||||
@@ -1023,6 +1065,50 @@ _brw_wm_prog_data_reg_blocks(const struct brw_wm_prog_data *prog_data,
|
|||||||
_brw_wm_prog_data_reg_blocks(prog_data, \
|
_brw_wm_prog_data_reg_blocks(prog_data, \
|
||||||
brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx))
|
brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx))
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
brw_wm_prog_data_is_persample(const struct brw_wm_prog_data *prog_data,
|
||||||
|
enum brw_wm_msaa_flags pushed_msaa_flags)
|
||||||
|
{
|
||||||
|
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) {
|
||||||
|
if (!(pushed_msaa_flags & BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (prog_data->sample_shading)
|
||||||
|
assert(pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
|
||||||
|
|
||||||
|
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH)
|
||||||
|
assert(prog_data->persample_dispatch != BRW_NEVER);
|
||||||
|
else
|
||||||
|
assert(prog_data->persample_dispatch != BRW_ALWAYS);
|
||||||
|
|
||||||
|
return (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(prog_data->persample_dispatch == BRW_ALWAYS ||
|
||||||
|
prog_data->persample_dispatch == BRW_NEVER);
|
||||||
|
|
||||||
|
return prog_data->persample_dispatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
|
||||||
|
enum brw_wm_msaa_flags pushed_msaa_flags)
|
||||||
|
{
|
||||||
|
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) {
|
||||||
|
if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_COARSE_DISPATCH)
|
||||||
|
assert(prog_data->coarse_pixel_dispatch != BRW_NEVER);
|
||||||
|
else
|
||||||
|
assert(prog_data->coarse_pixel_dispatch != BRW_ALWAYS);
|
||||||
|
|
||||||
|
return pushed_msaa_flags & BRW_WM_MSAA_FLAG_COARSE_DISPATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(prog_data->coarse_pixel_dispatch == BRW_ALWAYS ||
|
||||||
|
prog_data->coarse_pixel_dispatch == BRW_NEVER);
|
||||||
|
|
||||||
|
return prog_data->coarse_pixel_dispatch;
|
||||||
|
}
|
||||||
|
|
||||||
struct brw_push_const_block {
|
struct brw_push_const_block {
|
||||||
unsigned dwords; /* Dword count, not reg aligned */
|
unsigned dwords; /* Dword count, not reg aligned */
|
||||||
unsigned regs;
|
unsigned regs;
|
||||||
|
@@ -1220,7 +1220,7 @@ fs_visitor::emit_samplepos_setup()
|
|||||||
const fs_builder abld = bld.annotate("compute sample position");
|
const fs_builder abld = bld.annotate("compute sample position");
|
||||||
fs_reg pos = abld.vgrf(BRW_REGISTER_TYPE_F, 2);
|
fs_reg pos = abld.vgrf(BRW_REGISTER_TYPE_F, 2);
|
||||||
|
|
||||||
if (!wm_prog_data->persample_dispatch) {
|
if (wm_prog_data->persample_dispatch == BRW_NEVER) {
|
||||||
/* From ARB_sample_shading specification:
|
/* From ARB_sample_shading specification:
|
||||||
* "When rendering to a non-multisample buffer, or if multisample
|
* "When rendering to a non-multisample buffer, or if multisample
|
||||||
* rasterization is disabled, gl_SamplePosition will always be
|
* rasterization is disabled, gl_SamplePosition will always be
|
||||||
@@ -1255,6 +1255,16 @@ fs_visitor::emit_samplepos_setup()
|
|||||||
abld.MUL(offset(pos, abld, i), tmp_f, brw_imm_f(1 / 16.0f));
|
abld.MUL(offset(pos, abld, i), tmp_f, brw_imm_f(1 / 16.0f));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (wm_prog_data->persample_dispatch == BRW_SOMETIMES) {
|
||||||
|
check_dynamic_msaa_flag(abld, wm_prog_data,
|
||||||
|
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
|
||||||
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
|
set_predicate(BRW_PREDICATE_NORMAL,
|
||||||
|
bld.SEL(offset(pos, abld, i), offset(pos, abld, i),
|
||||||
|
brw_imm_f(0.5f)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1369,12 +1379,12 @@ fs_visitor::emit_samplemaskin_setup()
|
|||||||
assert(devinfo->ver >= 6);
|
assert(devinfo->ver >= 6);
|
||||||
|
|
||||||
/* The HW doesn't provide us with expected values. */
|
/* The HW doesn't provide us with expected values. */
|
||||||
assert(!wm_prog_data->per_coarse_pixel_dispatch);
|
assert(wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS);
|
||||||
|
|
||||||
fs_reg coverage_mask =
|
fs_reg coverage_mask =
|
||||||
fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
|
fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
|
||||||
|
|
||||||
if (!wm_prog_data->persample_dispatch)
|
if (wm_prog_data->persample_dispatch == BRW_NEVER)
|
||||||
return coverage_mask;
|
return coverage_mask;
|
||||||
|
|
||||||
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
|
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
|
||||||
@@ -1399,6 +1409,13 @@ fs_visitor::emit_samplemaskin_setup()
|
|||||||
fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_D);
|
fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_D);
|
||||||
abld.AND(mask, enabled_mask, coverage_mask);
|
abld.AND(mask, enabled_mask, coverage_mask);
|
||||||
|
|
||||||
|
if (wm_prog_data->persample_dispatch == BRW_ALWAYS)
|
||||||
|
return mask;
|
||||||
|
|
||||||
|
check_dynamic_msaa_flag(abld, wm_prog_data,
|
||||||
|
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
|
||||||
|
set_predicate(BRW_PREDICATE_NORMAL, abld.SEL(mask, mask, coverage_mask));
|
||||||
|
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1413,7 +1430,7 @@ fs_visitor::emit_shading_rate_setup()
|
|||||||
/* Coarse pixel shading size fields overlap with other fields of not in
|
/* Coarse pixel shading size fields overlap with other fields of not in
|
||||||
* coarse pixel dispatch mode, so report 0 when that's not the case.
|
* coarse pixel dispatch mode, so report 0 when that's not the case.
|
||||||
*/
|
*/
|
||||||
if (!wm_prog_data->per_coarse_pixel_dispatch)
|
if (wm_prog_data->coarse_pixel_dispatch == BRW_NEVER)
|
||||||
return brw_imm_ud(0);
|
return brw_imm_ud(0);
|
||||||
|
|
||||||
const fs_builder abld = bld.annotate("compute fragment shading rate");
|
const fs_builder abld = bld.annotate("compute fragment shading rate");
|
||||||
@@ -1439,6 +1456,13 @@ fs_visitor::emit_shading_rate_setup()
|
|||||||
fs_reg rate = abld.vgrf(BRW_REGISTER_TYPE_UD);
|
fs_reg rate = abld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
abld.OR(rate, int_rate_x, int_rate_y);
|
abld.OR(rate, int_rate_x, int_rate_y);
|
||||||
|
|
||||||
|
if (wm_prog_data->coarse_pixel_dispatch == BRW_ALWAYS)
|
||||||
|
return rate;
|
||||||
|
|
||||||
|
check_dynamic_msaa_flag(abld, wm_prog_data,
|
||||||
|
BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
|
||||||
|
set_predicate(BRW_PREDICATE_NORMAL, abld.SEL(rate, rate, brw_imm_ud(0)));
|
||||||
|
|
||||||
return rate;
|
return rate;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7256,10 +7280,14 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
|
|||||||
prog_data->computed_stencil =
|
prog_data->computed_stencil =
|
||||||
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
|
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
|
||||||
|
|
||||||
prog_data->persample_dispatch =
|
prog_data->sample_shading =
|
||||||
key->multisample_fbo &&
|
shader->info.fs.uses_sample_shading ||
|
||||||
(key->persample_interp ||
|
shader->info.outputs_read;
|
||||||
shader->info.fs.uses_sample_shading);
|
|
||||||
|
prog_data->persample_dispatch = BRW_NEVER;
|
||||||
|
if (key->multisample_fbo &&
|
||||||
|
(key->persample_interp || prog_data->sample_shading))
|
||||||
|
prog_data->persample_dispatch = BRW_ALWAYS;
|
||||||
|
|
||||||
if (devinfo->ver >= 6) {
|
if (devinfo->ver >= 6) {
|
||||||
prog_data->uses_sample_mask =
|
prog_data->uses_sample_mask =
|
||||||
@@ -7274,7 +7302,8 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
|
|||||||
* per-sample dispatch. If we need gl_SamplePosition and we don't have
|
* per-sample dispatch. If we need gl_SamplePosition and we don't have
|
||||||
* persample dispatch, we hard-code it to 0.5.
|
* persample dispatch, we hard-code it to 0.5.
|
||||||
*/
|
*/
|
||||||
prog_data->uses_pos_offset = prog_data->persample_dispatch &&
|
prog_data->uses_pos_offset =
|
||||||
|
prog_data->persample_dispatch != BRW_NEVER &&
|
||||||
(BITSET_TEST(shader->info.system_values_read,
|
(BITSET_TEST(shader->info.system_values_read,
|
||||||
SYSTEM_VALUE_SAMPLE_POS) ||
|
SYSTEM_VALUE_SAMPLE_POS) ||
|
||||||
BITSET_TEST(shader->info.system_values_read,
|
BITSET_TEST(shader->info.system_values_read,
|
||||||
@@ -7295,13 +7324,16 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
|
|||||||
|
|
||||||
/* You can't be coarse and per-sample */
|
/* You can't be coarse and per-sample */
|
||||||
assert(!key->coarse_pixel || !key->persample_interp);
|
assert(!key->coarse_pixel || !key->persample_interp);
|
||||||
prog_data->per_coarse_pixel_dispatch =
|
prog_data->coarse_pixel_dispatch =
|
||||||
key->coarse_pixel &&
|
brw_sometimes_invert(prog_data->persample_dispatch);
|
||||||
!shader->info.fs.uses_sample_shading &&
|
if (!key->coarse_pixel ||
|
||||||
!prog_data->uses_omask &&
|
prog_data->uses_omask ||
|
||||||
!prog_data->uses_sample_mask &&
|
prog_data->sample_shading ||
|
||||||
(prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) &&
|
prog_data->uses_sample_mask ||
|
||||||
!prog_data->computed_stencil;
|
(prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) ||
|
||||||
|
prog_data->computed_stencil) {
|
||||||
|
prog_data->coarse_pixel_dispatch = BRW_NEVER;
|
||||||
|
}
|
||||||
|
|
||||||
/* We choose to always enable VMask prior to XeHP, as it would cause
|
/* We choose to always enable VMask prior to XeHP, as it would cause
|
||||||
* us to lose out on the eliminate_find_live_channel() optimization.
|
* us to lose out on the eliminate_find_live_channel() optimization.
|
||||||
@@ -7309,16 +7341,16 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
|
|||||||
prog_data->uses_vmask = devinfo->verx10 < 125 ||
|
prog_data->uses_vmask = devinfo->verx10 < 125 ||
|
||||||
shader->info.fs.needs_quad_helper_invocations ||
|
shader->info.fs.needs_quad_helper_invocations ||
|
||||||
shader->info.fs.needs_all_helper_invocations ||
|
shader->info.fs.needs_all_helper_invocations ||
|
||||||
prog_data->per_coarse_pixel_dispatch;
|
prog_data->coarse_pixel_dispatch != BRW_NEVER;
|
||||||
|
|
||||||
prog_data->uses_src_w =
|
prog_data->uses_src_w =
|
||||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
|
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
|
||||||
prog_data->uses_src_depth =
|
prog_data->uses_src_depth =
|
||||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
|
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
|
||||||
!prog_data->per_coarse_pixel_dispatch;
|
prog_data->coarse_pixel_dispatch != BRW_ALWAYS;
|
||||||
prog_data->uses_depth_w_coefficients =
|
prog_data->uses_depth_w_coefficients =
|
||||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
|
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
|
||||||
prog_data->per_coarse_pixel_dispatch;
|
prog_data->coarse_pixel_dispatch != BRW_NEVER;
|
||||||
|
|
||||||
calculate_urb_setup(devinfo, key, prog_data, shader, mue_map);
|
calculate_urb_setup(devinfo, key, prog_data, shader, mue_map);
|
||||||
brw_compute_flat_inputs(prog_data, shader);
|
brw_compute_flat_inputs(prog_data, shader);
|
||||||
|
@@ -721,6 +721,24 @@ namespace brw {
|
|||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline fs_reg
|
||||||
|
dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
|
||||||
|
{
|
||||||
|
return fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
|
||||||
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
check_dynamic_msaa_flag(const fs_builder &bld,
|
||||||
|
const struct brw_wm_prog_data *wm_prog_data,
|
||||||
|
enum brw_wm_msaa_flags flag)
|
||||||
|
{
|
||||||
|
fs_inst *inst = bld.AND(bld.null_reg_ud(),
|
||||||
|
dynamic_msaa_flags(wm_prog_data),
|
||||||
|
brw_imm_ud(flag));
|
||||||
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
|
lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
|
||||||
}
|
}
|
||||||
|
@@ -291,10 +291,10 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
|
|
||||||
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
|
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
|
||||||
|
|
||||||
fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */
|
fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
|
||||||
fs_reg int_pixel_offset_xy; /* Used on Gen8+ */
|
fs_reg int_sample_offset_xy; /* Used on Gen8+ */
|
||||||
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
|
fs_reg half_int_sample_offset_x, half_int_sample_offset_y;
|
||||||
if (!wm_prog_data->per_coarse_pixel_dispatch) {
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
|
||||||
/* The thread payload only delivers subspan locations (ss0, ss1,
|
/* The thread payload only delivers subspan locations (ss0, ss1,
|
||||||
* ss2, ...). Since subspans covers 2x2 pixels blocks, we need to
|
* ss2, ...). Since subspans covers 2x2 pixels blocks, we need to
|
||||||
* generate 4 pixel coordinates out of each subspan location. We do this
|
* generate 4 pixel coordinates out of each subspan location. We do this
|
||||||
@@ -324,9 +324,9 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
* coordinates out of 2 subspans coordinates in a single ADD instruction
|
* coordinates out of 2 subspans coordinates in a single ADD instruction
|
||||||
* (twice the operation above).
|
* (twice the operation above).
|
||||||
*/
|
*/
|
||||||
int_pixel_offset_xy = fs_reg(brw_imm_v(0x11001010));
|
int_sample_offset_xy = fs_reg(brw_imm_v(0x11001010));
|
||||||
half_int_pixel_offset_x = fs_reg(brw_imm_uw(0));
|
half_int_sample_offset_x = fs_reg(brw_imm_uw(0));
|
||||||
half_int_pixel_offset_y = fs_reg(brw_imm_uw(0));
|
half_int_sample_offset_y = fs_reg(brw_imm_uw(0));
|
||||||
/* On Gfx12.5, because of regioning restrictions, the interpolation code
|
/* On Gfx12.5, because of regioning restrictions, the interpolation code
|
||||||
* is slightly different and works off X & Y only inputs. The ordering
|
* is slightly different and works off X & Y only inputs. The ordering
|
||||||
* of the half bytes here is a bit odd, with each subspan replicated
|
* of the half bytes here is a bit odd, with each subspan replicated
|
||||||
@@ -336,9 +336,14 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
* X offset: 0 0 1 0 0 0 1 0
|
* X offset: 0 0 1 0 0 0 1 0
|
||||||
* Y offset: 0 0 0 0 1 0 1 0
|
* Y offset: 0 0 0 0 1 0 1 0
|
||||||
*/
|
*/
|
||||||
int_pixel_offset_x = fs_reg(brw_imm_v(0x01000100));
|
int_sample_offset_x = fs_reg(brw_imm_v(0x01000100));
|
||||||
int_pixel_offset_y = fs_reg(brw_imm_v(0x01010000));
|
int_sample_offset_y = fs_reg(brw_imm_v(0x01010000));
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
fs_reg int_coarse_offset_x, int_coarse_offset_y; /* Used on Gen12HP+ */
|
||||||
|
fs_reg int_coarse_offset_xy; /* Used on Gen8+ */
|
||||||
|
fs_reg half_int_coarse_offset_x, half_int_coarse_offset_y;
|
||||||
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
|
||||||
/* In coarse pixel dispatch we have to do the same ADD instruction that
|
/* In coarse pixel dispatch we have to do the same ADD instruction that
|
||||||
* we do in normal per pixel dispatch, except this time we're not adding
|
* we do in normal per pixel dispatch, except this time we're not adding
|
||||||
* 1 in each direction, but instead the coarse pixel size.
|
* 1 in each direction, but instead the coarse pixel size.
|
||||||
@@ -354,34 +359,84 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
/* To build the array of half bytes we do and AND operation with the
|
/* To build the array of half bytes we do and AND operation with the
|
||||||
* right mask in X.
|
* right mask in X.
|
||||||
*/
|
*/
|
||||||
int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
int_coarse_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
dbld.AND(int_pixel_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0f000f00));
|
dbld.AND(int_coarse_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0f000f00));
|
||||||
|
|
||||||
/* And the right mask in Y. */
|
/* And the right mask in Y. */
|
||||||
int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
int_coarse_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
dbld.AND(int_pixel_offset_y, byte_offset(r1_0, 1), brw_imm_v(0x0f0f0000));
|
dbld.AND(int_coarse_offset_y, byte_offset(r1_0, 1), brw_imm_v(0x0f0f0000));
|
||||||
} else {
|
} else {
|
||||||
/* To build the array of half bytes we do and AND operation with the
|
/* To build the array of half bytes we do and AND operation with the
|
||||||
* right mask in X.
|
* right mask in X.
|
||||||
*/
|
*/
|
||||||
int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
int_coarse_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
dbld.AND(int_pixel_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0000f0f0));
|
dbld.AND(int_coarse_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0000f0f0));
|
||||||
|
|
||||||
/* And the right mask in Y. */
|
/* And the right mask in Y. */
|
||||||
int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
int_coarse_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
dbld.AND(int_pixel_offset_y, byte_offset(r1_0, 1), brw_imm_v(0xff000000));
|
dbld.AND(int_coarse_offset_y, byte_offset(r1_0, 1), brw_imm_v(0xff000000));
|
||||||
|
|
||||||
/* Finally OR the 2 registers. */
|
/* Finally OR the 2 registers. */
|
||||||
int_pixel_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
int_coarse_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
dbld.OR(int_pixel_offset_xy, int_pixel_offset_x, int_pixel_offset_y);
|
dbld.OR(int_coarse_offset_xy, int_coarse_offset_x, int_coarse_offset_y);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Also compute the half pixel size used to center pixels. */
|
/* Also compute the half coarse size used to center coarses. */
|
||||||
half_int_pixel_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
half_int_coarse_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
half_int_pixel_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
half_int_coarse_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
|
|
||||||
bld.SHR(half_int_pixel_offset_x, suboffset(r1_0, 0), brw_imm_ud(1));
|
bld.SHR(half_int_coarse_offset_x, suboffset(r1_0, 0), brw_imm_ud(1));
|
||||||
bld.SHR(half_int_pixel_offset_y, suboffset(r1_0, 1), brw_imm_ud(1));
|
bld.SHR(half_int_coarse_offset_y, suboffset(r1_0, 1), brw_imm_ud(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */
|
||||||
|
fs_reg int_pixel_offset_xy; /* Used on Gen8+ */
|
||||||
|
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
|
||||||
|
switch (wm_prog_data->coarse_pixel_dispatch) {
|
||||||
|
case BRW_NEVER:
|
||||||
|
#define COPY_OFFSET_REG(prefix, suffix) \
|
||||||
|
prefix##_pixel_##suffix = prefix##_sample_##suffix;
|
||||||
|
|
||||||
|
COPY_OFFSET_REG(int, offset_x)
|
||||||
|
COPY_OFFSET_REG(int, offset_y)
|
||||||
|
COPY_OFFSET_REG(int, offset_xy)
|
||||||
|
COPY_OFFSET_REG(half_int, offset_x)
|
||||||
|
COPY_OFFSET_REG(half_int, offset_y)
|
||||||
|
|
||||||
|
#undef COPY_OFFSET_REG
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BRW_SOMETIMES:
|
||||||
|
check_dynamic_msaa_flag(bld, wm_prog_data,
|
||||||
|
BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
|
||||||
|
|
||||||
|
#define COPY_OFFSET_REG(prefix, suffix) \
|
||||||
|
prefix##_pixel_##suffix = bld.vgrf(BRW_REGISTER_TYPE_UW); \
|
||||||
|
bld.SEL(prefix##_pixel_##suffix, \
|
||||||
|
prefix##_coarse_##suffix, \
|
||||||
|
prefix##_pixel_##suffix); \
|
||||||
|
|
||||||
|
COPY_OFFSET_REG(int, offset_x)
|
||||||
|
COPY_OFFSET_REG(int, offset_y)
|
||||||
|
COPY_OFFSET_REG(int, offset_xy)
|
||||||
|
COPY_OFFSET_REG(half_int, offset_x)
|
||||||
|
COPY_OFFSET_REG(half_int, offset_y)
|
||||||
|
|
||||||
|
#undef COPY_OFFSET_REG
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BRW_ALWAYS:
|
||||||
|
#define COPY_OFFSET_REG(prefix, suffix) \
|
||||||
|
prefix##_pixel_##suffix = prefix##_coarse_##suffix;
|
||||||
|
|
||||||
|
COPY_OFFSET_REG(int, offset_x)
|
||||||
|
COPY_OFFSET_REG(int, offset_y)
|
||||||
|
COPY_OFFSET_REG(int, offset_xy)
|
||||||
|
COPY_OFFSET_REG(half_int, offset_x)
|
||||||
|
COPY_OFFSET_REG(half_int, offset_y)
|
||||||
|
|
||||||
|
#undef COPY_OFFSET_REG
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {
|
for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {
|
||||||
@@ -401,11 +456,15 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
fs_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
|
fs_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
|
||||||
int_pixel_offset_y);
|
int_pixel_offset_y);
|
||||||
|
|
||||||
if (wm_prog_data->per_coarse_pixel_dispatch) {
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
|
||||||
dbld.ADD(int_pixel_x, int_pixel_x,
|
fs_inst *addx = dbld.ADD(int_pixel_x, int_pixel_x,
|
||||||
horiz_stride(half_int_pixel_offset_x, 0));
|
horiz_stride(half_int_pixel_offset_x, 0));
|
||||||
dbld.ADD(int_pixel_y, int_pixel_y,
|
fs_inst *addy = dbld.ADD(int_pixel_y, int_pixel_y,
|
||||||
horiz_stride(half_int_pixel_offset_y, 0));
|
horiz_stride(half_int_pixel_offset_y, 0));
|
||||||
|
if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
|
||||||
|
addx->predicate = BRW_PREDICATE_NORMAL;
|
||||||
|
addy->predicate = BRW_PREDICATE_NORMAL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
hbld.MOV(offset(pixel_x, hbld, i), horiz_stride(int_pixel_x, 2));
|
hbld.MOV(offset(pixel_x, hbld, i), horiz_stride(int_pixel_x, 2));
|
||||||
@@ -463,8 +522,8 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
}
|
}
|
||||||
|
|
||||||
abld = bld.annotate("compute pos.z");
|
abld = bld.annotate("compute pos.z");
|
||||||
|
fs_reg coarse_z;
|
||||||
if (wm_prog_data->uses_depth_w_coefficients) {
|
if (wm_prog_data->uses_depth_w_coefficients) {
|
||||||
assert(!wm_prog_data->uses_src_depth);
|
|
||||||
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate
|
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate
|
||||||
* properly. In the same way we have to add the coarse pixel size to
|
* properly. In the same way we have to add the coarse pixel size to
|
||||||
* pixels locations, here we recompute the Z value with 2 coefficients
|
* pixels locations, here we recompute the Z value with 2 coefficients
|
||||||
@@ -501,9 +560,9 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
abld.MAD(float_pixel_x, float_pixel_x, brw_imm_f(0.5f), f_cps_width);
|
abld.MAD(float_pixel_x, float_pixel_x, brw_imm_f(0.5f), f_cps_width);
|
||||||
abld.MAD(float_pixel_y, float_pixel_y, brw_imm_f(0.5f), f_cps_height);
|
abld.MAD(float_pixel_y, float_pixel_y, brw_imm_f(0.5f), f_cps_height);
|
||||||
|
|
||||||
this->pixel_z = abld.vgrf(BRW_REGISTER_TYPE_F);
|
coarse_z = abld.vgrf(BRW_REGISTER_TYPE_F);
|
||||||
abld.MAD(this->pixel_z, z_c0, z_cx, float_pixel_x);
|
abld.MAD(coarse_z, z_c0, z_cx, float_pixel_x);
|
||||||
abld.MAD(this->pixel_z, this->pixel_z, z_cy, float_pixel_y);
|
abld.MAD(coarse_z, coarse_z, z_cy, float_pixel_y);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wm_prog_data->uses_src_depth) {
|
if (wm_prog_data->uses_src_depth) {
|
||||||
@@ -511,6 +570,34 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||||||
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
|
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (wm_prog_data->uses_depth_w_coefficients ||
|
||||||
|
wm_prog_data->uses_src_depth) {
|
||||||
|
fs_reg sample_z = this->pixel_z;
|
||||||
|
|
||||||
|
switch (wm_prog_data->coarse_pixel_dispatch) {
|
||||||
|
case BRW_NEVER:
|
||||||
|
assert(wm_prog_data->uses_src_depth);
|
||||||
|
assert(!wm_prog_data->uses_depth_w_coefficients);
|
||||||
|
this->pixel_z = sample_z;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BRW_SOMETIMES:
|
||||||
|
assert(wm_prog_data->uses_src_depth);
|
||||||
|
assert(wm_prog_data->uses_depth_w_coefficients);
|
||||||
|
this->pixel_z = abld.vgrf(BRW_REGISTER_TYPE_F);
|
||||||
|
|
||||||
|
/* We re-use the check_dynamic_msaa_flag() call from above */
|
||||||
|
abld.SEL(this->pixel_z, coarse_z, sample_z);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BRW_ALWAYS:
|
||||||
|
assert(!wm_prog_data->uses_src_depth);
|
||||||
|
assert(wm_prog_data->uses_depth_w_coefficients);
|
||||||
|
this->pixel_z = coarse_z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (wm_prog_data->uses_src_w) {
|
if (wm_prog_data->uses_src_w) {
|
||||||
abld = bld.annotate("compute pos.w");
|
abld = bld.annotate("compute pos.w");
|
||||||
this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg);
|
this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg);
|
||||||
|
@@ -359,7 +359,18 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||||||
inst->desc =
|
inst->desc =
|
||||||
(inst->group / 16) << 11 | /* rt slot group */
|
(inst->group / 16) << 11 | /* rt slot group */
|
||||||
brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,
|
brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,
|
||||||
prog_data->per_coarse_pixel_dispatch);
|
0 /* coarse_write */);
|
||||||
|
|
||||||
|
fs_reg desc = brw_imm_ud(0);
|
||||||
|
if (prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
|
||||||
|
inst->desc |= (1 << 18);
|
||||||
|
} else if (prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
|
||||||
|
STATIC_ASSERT(BRW_WM_MSAA_FLAG_COARSE_DISPATCH == (1 << 18));
|
||||||
|
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
||||||
|
desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
ubld.AND(desc, dynamic_msaa_flags(prog_data),
|
||||||
|
brw_imm_ud(BRW_WM_MSAA_FLAG_COARSE_DISPATCH));
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t ex_desc = 0;
|
uint32_t ex_desc = 0;
|
||||||
if (devinfo->ver >= 11) {
|
if (devinfo->ver >= 11) {
|
||||||
@@ -376,7 +387,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->resize_sources(3);
|
inst->resize_sources(3);
|
||||||
inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
|
inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
|
||||||
inst->src[0] = brw_imm_ud(0);
|
inst->src[0] = desc;
|
||||||
inst->src[1] = brw_imm_ud(0);
|
inst->src[1] = brw_imm_ud(0);
|
||||||
inst->src[2] = payload;
|
inst->src[2] = payload;
|
||||||
inst->mlen = regs_written(load);
|
inst->mlen = regs_written(load);
|
||||||
@@ -2456,8 +2467,6 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||||||
fs_reg payload = brw_vec8_grf(0, 0);
|
fs_reg payload = brw_vec8_grf(0, 0);
|
||||||
unsigned mlen = 1;
|
unsigned mlen = 1;
|
||||||
|
|
||||||
const fs_reg desc = inst->src[1];
|
|
||||||
|
|
||||||
unsigned mode;
|
unsigned mode;
|
||||||
switch (inst->opcode) {
|
switch (inst->opcode) {
|
||||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||||
@@ -2480,11 +2489,33 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||||||
unreachable("Invalid interpolator instruction");
|
unreachable("Invalid interpolator instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fs_reg desc = inst->src[1];
|
||||||
uint32_t desc_imm =
|
uint32_t desc_imm =
|
||||||
brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective,
|
brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective,
|
||||||
wm_prog_data->per_coarse_pixel_dispatch,
|
false /* coarse_pixel_rate */,
|
||||||
inst->exec_size, inst->group);
|
inst->exec_size, inst->group);
|
||||||
|
|
||||||
|
if (wm_prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
|
||||||
|
desc_imm |= (1 << 15);
|
||||||
|
} else if (wm_prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
|
||||||
|
fs_reg orig_desc = desc;
|
||||||
|
const fs_builder &ubld = bld.exec_all().group(8, 0);
|
||||||
|
desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
ubld.AND(desc, dynamic_msaa_flags(wm_prog_data),
|
||||||
|
brw_imm_ud(BRW_WM_MSAA_FLAG_COARSE_DISPATCH));
|
||||||
|
|
||||||
|
/* The uniform is in bit 18 but we need it in bit 15 */
|
||||||
|
STATIC_ASSERT(BRW_WM_MSAA_FLAG_COARSE_DISPATCH == (1 << 18));
|
||||||
|
ubld.SHR(desc, desc, brw_imm_ud(3));
|
||||||
|
|
||||||
|
/* And, if it's AT_OFFSET, we might have a non-trivial descriptor */
|
||||||
|
if (orig_desc.file == IMM) {
|
||||||
|
desc_imm |= orig_desc.ud;
|
||||||
|
} else {
|
||||||
|
ubld.OR(desc, desc, orig_desc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assert(bld.shader->devinfo->ver >= 7);
|
assert(bld.shader->devinfo->ver >= 7);
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR;
|
inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR;
|
||||||
|
@@ -1526,7 +1526,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
|||||||
|
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
||||||
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
||||||
ms != NULL ? ms->rasterization_samples : 1);
|
ms != NULL ? ms->rasterization_samples : 1,
|
||||||
|
0 /* msaa_flags */);
|
||||||
|
|
||||||
ps.KernelStartPointer0 = fs_bin->kernel.offset +
|
ps.KernelStartPointer0 = fs_bin->kernel.offset +
|
||||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
||||||
@@ -1581,7 +1582,8 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
|||||||
ps.PixelShaderValid = true;
|
ps.PixelShaderValid = true;
|
||||||
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
||||||
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
||||||
ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
|
ps.PixelShaderIsPerSample =
|
||||||
|
brw_wm_prog_data_is_persample(wm_prog_data, 0);
|
||||||
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
||||||
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
||||||
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
||||||
@@ -1603,7 +1605,7 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
|||||||
assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */
|
assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */
|
||||||
if (!wm_prog_data->uses_sample_mask)
|
if (!wm_prog_data->uses_sample_mask)
|
||||||
ps.InputCoverageMaskState = ICMS_NONE;
|
ps.InputCoverageMaskState = ICMS_NONE;
|
||||||
else if (wm_prog_data->per_coarse_pixel_dispatch)
|
else if (brw_wm_prog_data_is_coarse(wm_prog_data, 0))
|
||||||
ps.InputCoverageMaskState = ICMS_NORMAL;
|
ps.InputCoverageMaskState = ICMS_NORMAL;
|
||||||
else if (wm_prog_data->post_depth_coverage)
|
else if (wm_prog_data->post_depth_coverage)
|
||||||
ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
|
ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
|
||||||
@@ -1613,13 +1615,14 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
|||||||
#if GFX_VER >= 11
|
#if GFX_VER >= 11
|
||||||
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
|
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
|
||||||
wm_prog_data->uses_depth_w_coefficients;
|
wm_prog_data->uses_depth_w_coefficients;
|
||||||
ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch;
|
ps.PixelShaderIsPerCoarsePixel = wm_prog_data->coarse_pixel_dispatch;
|
||||||
#endif
|
#endif
|
||||||
#if GFX_VERx10 >= 125
|
#if GFX_VERx10 >= 125
|
||||||
/* TODO: We should only require this when the last geometry shader uses
|
/* TODO: We should only require this when the last geometry shader uses
|
||||||
* a fragment shading rate that is not constant.
|
* a fragment shading rate that is not constant.
|
||||||
*/
|
*/
|
||||||
ps.EnablePSDependencyOnCPsizeChange = wm_prog_data->per_coarse_pixel_dispatch;
|
ps.EnablePSDependencyOnCPsizeChange =
|
||||||
|
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -315,7 +315,8 @@ genX(emit_shading_rate)(struct anv_batch *batch,
|
|||||||
const struct vk_fragment_shading_rate_state *fsr)
|
const struct vk_fragment_shading_rate_state *fsr)
|
||||||
{
|
{
|
||||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||||
const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch;
|
const bool cps_enable = wm_prog_data &&
|
||||||
|
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
|
||||||
|
|
||||||
#if GFX_VER == 11
|
#if GFX_VER == 11
|
||||||
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
|
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
|
||||||
|
@@ -1605,7 +1605,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
|
|||||||
wm.PixelShaderKillsPixel;
|
wm.PixelShaderKillsPixel;
|
||||||
|
|
||||||
if (ms != NULL && ms->rasterization_samples > 1) {
|
if (ms != NULL && ms->rasterization_samples > 1) {
|
||||||
if (wm_prog_data->persample_dispatch) {
|
if (brw_wm_prog_data_is_persample(wm_prog_data, 0)) {
|
||||||
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
||||||
} else {
|
} else {
|
||||||
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
||||||
@@ -1671,7 +1671,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
|||||||
|
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
||||||
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
||||||
ms != NULL ? ms->rasterization_samples : 1);
|
ms != NULL ? ms->rasterization_samples : 1,
|
||||||
|
0 /* msaa_flags */);
|
||||||
|
|
||||||
ps.KernelStartPointer0 = fs_bin->kernel.offset +
|
ps.KernelStartPointer0 = fs_bin->kernel.offset +
|
||||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
||||||
@@ -1739,7 +1740,8 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
|||||||
ps.PixelShaderValid = true;
|
ps.PixelShaderValid = true;
|
||||||
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
||||||
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
||||||
ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
|
ps.PixelShaderIsPerSample =
|
||||||
|
brw_wm_prog_data_is_persample(wm_prog_data, 0);
|
||||||
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
||||||
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
||||||
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
||||||
|
Reference in New Issue
Block a user