intel/compiler: Use FS thread payload only for FS
Move the setup into the FS thread payload constructor. Consolidate payload setup for that in brw_fs_thread_payload.cpp file. Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18176>
This commit is contained in:
@@ -1299,7 +1299,7 @@ fs_visitor::emit_samplepos_setup()
|
||||
* the positions using vstride=16, width=8, hstride=2.
|
||||
*/
|
||||
const fs_reg sample_pos_reg =
|
||||
fetch_payload_reg(abld, payload.sample_pos_reg, BRW_REGISTER_TYPE_W);
|
||||
fetch_payload_reg(abld, fs_payload().sample_pos_reg, BRW_REGISTER_TYPE_W);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
fs_reg tmp_d = bld.vgrf(BRW_REGISTER_TYPE_D);
|
||||
@@ -1430,7 +1430,7 @@ fs_visitor::emit_samplemaskin_setup()
|
||||
assert(!wm_prog_data->per_coarse_pixel_dispatch);
|
||||
|
||||
fs_reg coverage_mask =
|
||||
fetch_payload_reg(bld, payload.sample_mask_in_reg, BRW_REGISTER_TYPE_D);
|
||||
fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
|
||||
|
||||
if (wm_prog_data->persample_dispatch) {
|
||||
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
|
||||
@@ -1616,7 +1616,7 @@ fs_visitor::assign_curb_setup()
|
||||
fs_reg(), /* payload2 */
|
||||
};
|
||||
|
||||
fs_reg dest = retype(brw_vec8_grf(payload.num_regs + i, 0),
|
||||
fs_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, dest, srcs, 4);
|
||||
|
||||
@@ -1667,7 +1667,7 @@ fs_visitor::assign_curb_setup()
|
||||
assert(constant_nr / 8 < 64);
|
||||
used |= BITFIELD64_BIT(constant_nr / 8);
|
||||
|
||||
struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs +
|
||||
struct brw_reg brw_reg = brw_vec1_grf(payload().num_regs +
|
||||
constant_nr / 8,
|
||||
constant_nr % 8);
|
||||
brw_reg.abs = inst->src[i].abs;
|
||||
@@ -1688,7 +1688,7 @@ fs_visitor::assign_curb_setup()
|
||||
|
||||
/* push_reg_mask_param is in 32-bit units */
|
||||
unsigned mask_param = stage_prog_data->push_reg_mask_param;
|
||||
struct brw_reg mask = brw_vec1_grf(payload.num_regs + mask_param / 8,
|
||||
struct brw_reg mask = brw_vec1_grf(payload().num_regs + mask_param / 8,
|
||||
mask_param % 8);
|
||||
|
||||
fs_reg b32;
|
||||
@@ -1708,7 +1708,7 @@ fs_visitor::assign_curb_setup()
|
||||
if (want_zero & BITFIELD64_BIT(i)) {
|
||||
assert(i < prog_data->curb_read_length);
|
||||
struct brw_reg push_reg =
|
||||
retype(brw_vec8_grf(payload.num_regs + i, 0),
|
||||
retype(brw_vec8_grf(payload().num_regs + i, 0),
|
||||
BRW_REGISTER_TYPE_D);
|
||||
|
||||
ubld.AND(push_reg, push_reg, component(b32, i % 16));
|
||||
@@ -1719,7 +1719,7 @@ fs_visitor::assign_curb_setup()
|
||||
}
|
||||
|
||||
/* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
|
||||
this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
|
||||
this->first_non_payload_grf = payload().num_regs + prog_data->curb_read_length;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1956,7 +1956,7 @@ fs_visitor::assign_urb_setup()
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||
|
||||
int urb_start = payload.num_regs + prog_data->base.curb_read_length;
|
||||
int urb_start = payload().num_regs + prog_data->base.curb_read_length;
|
||||
|
||||
/* Offset all the urb_setup[] index by the actual position of the
|
||||
* setup regs, now that the location of the constants has been chosen.
|
||||
@@ -2000,7 +2000,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
|
||||
{
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == ATTR) {
|
||||
int grf = payload.num_regs +
|
||||
int grf = payload().num_regs +
|
||||
prog_data->curb_read_length +
|
||||
inst->src[i].nr +
|
||||
inst->src[i].offset / REG_SIZE;
|
||||
@@ -5852,78 +5852,11 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::setup_fs_payload_gfx6()
|
||||
{
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||
const unsigned payload_width = MIN2(16, dispatch_width);
|
||||
assert(dispatch_width % payload_width == 0);
|
||||
assert(devinfo->ver >= 6);
|
||||
|
||||
/* R0: PS thread payload header. */
|
||||
payload.num_regs++;
|
||||
|
||||
for (unsigned j = 0; j < dispatch_width / payload_width; j++) {
|
||||
/* R1: masks, pixel X/Y coordinates. */
|
||||
payload.subspan_coord_reg[j] = payload.num_regs++;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < dispatch_width / payload_width; j++) {
|
||||
/* R3-26: barycentric interpolation coordinates. These appear in the
|
||||
* same order that they appear in the brw_barycentric_mode enum. Each
|
||||
* set of coordinates occupies 2 registers if dispatch width == 8 and 4
|
||||
* registers if dispatch width == 16. Coordinates only appear if they
|
||||
* were enabled using the "Barycentric Interpolation Mode" bits in
|
||||
* WM_STATE.
|
||||
*/
|
||||
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
|
||||
if (prog_data->barycentric_interp_modes & (1 << i)) {
|
||||
payload.barycentric_coord_reg[i][j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* R27-28: interpolated depth if uses source depth */
|
||||
if (prog_data->uses_src_depth) {
|
||||
payload.source_depth_reg[j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 8;
|
||||
}
|
||||
|
||||
/* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */
|
||||
if (prog_data->uses_src_w) {
|
||||
payload.source_w_reg[j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 8;
|
||||
}
|
||||
|
||||
/* R31: MSAA position offsets. */
|
||||
if (prog_data->uses_pos_offset) {
|
||||
payload.sample_pos_reg[j] = payload.num_regs;
|
||||
payload.num_regs++;
|
||||
}
|
||||
|
||||
/* R32-33: MSAA input coverage mask */
|
||||
if (prog_data->uses_sample_mask) {
|
||||
assert(devinfo->ver >= 7);
|
||||
payload.sample_mask_in_reg[j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 8;
|
||||
}
|
||||
|
||||
/* R66: Source Depth and/or W Attribute Vertex Deltas */
|
||||
if (prog_data->uses_depth_w_coefficients) {
|
||||
payload.depth_w_coef_reg[j] = payload.num_regs;
|
||||
payload.num_regs++;
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||
source_depth_to_render_target = true;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::setup_vs_payload()
|
||||
{
|
||||
thread_payload &payload = this->payload();
|
||||
|
||||
/* R0: thread header, R1: urb handles */
|
||||
payload.num_regs = 2;
|
||||
}
|
||||
@@ -5932,6 +5865,7 @@ void
|
||||
fs_visitor::setup_gs_payload()
|
||||
{
|
||||
assert(stage == MESA_SHADER_GEOMETRY);
|
||||
thread_payload &payload = this->payload();
|
||||
|
||||
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
|
||||
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
|
||||
@@ -5974,6 +5908,8 @@ fs_visitor::setup_gs_payload()
|
||||
void
|
||||
fs_visitor::setup_cs_payload()
|
||||
{
|
||||
thread_payload &payload = this->payload();
|
||||
|
||||
assert(devinfo->ver >= 7);
|
||||
/* TODO: Fill out uses_btd_stack_ids automatically */
|
||||
payload.num_regs = 1 + brw_cs_prog_data(prog_data)->uses_btd_stack_ids;
|
||||
@@ -6687,6 +6623,7 @@ bool
|
||||
fs_visitor::run_tcs()
|
||||
{
|
||||
assert(stage == MESA_SHADER_TESS_CTRL);
|
||||
thread_payload &payload = this->payload();
|
||||
|
||||
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
|
||||
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
|
||||
@@ -6761,7 +6698,7 @@ fs_visitor::run_tes()
|
||||
assert(stage == MESA_SHADER_TESS_EVAL);
|
||||
|
||||
/* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */
|
||||
payload.num_regs = 5;
|
||||
payload().num_regs = 5;
|
||||
|
||||
emit_nir_code();
|
||||
|
||||
@@ -6863,10 +6800,8 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
if (devinfo->ver >= 6)
|
||||
setup_fs_payload_gfx6();
|
||||
else
|
||||
setup_fs_payload_gfx4();
|
||||
payload_ = new fs_thread_payload(*this, source_depth_to_render_target,
|
||||
runtime_check_aads_emit);
|
||||
|
||||
if (0) {
|
||||
emit_dummy_fs();
|
||||
@@ -6971,7 +6906,7 @@ fs_visitor::run_bs(bool allow_spilling)
|
||||
assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
|
||||
|
||||
/* R0: thread header, R1: stack IDs, R2: argument addresses */
|
||||
payload.num_regs = 3;
|
||||
payload().num_regs = 3;
|
||||
|
||||
emit_nir_code();
|
||||
|
||||
@@ -7017,7 +6952,7 @@ fs_visitor::run_task(bool allow_spilling)
|
||||
* Inline parameter is optional but always present since we use it to pass
|
||||
* the address to descriptors.
|
||||
*/
|
||||
payload.num_regs = dispatch_width == 32 ? 4 : 3;
|
||||
payload().num_regs = dispatch_width == 32 ? 4 : 3;
|
||||
|
||||
emit_nir_code();
|
||||
|
||||
@@ -7064,7 +6999,7 @@ fs_visitor::run_mesh(bool allow_spilling)
|
||||
* Inline parameter is optional but always present since we use it to pass
|
||||
* the address to descriptors.
|
||||
*/
|
||||
payload.num_regs = dispatch_width == 32 ? 4 : 3;
|
||||
payload().num_regs = dispatch_width == 32 ? 4 : 3;
|
||||
|
||||
emit_nir_code();
|
||||
|
||||
@@ -7437,7 +7372,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||
return NULL;
|
||||
} else if (!INTEL_DEBUG(DEBUG_NO8)) {
|
||||
simd8_cfg = v8->cfg;
|
||||
prog_data->base.dispatch_grf_start_reg = v8->payload.num_regs;
|
||||
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs;
|
||||
prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used);
|
||||
const performance &perf = v8->performance_analysis.require();
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
@@ -7481,7 +7416,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||
v16->fail_msg);
|
||||
} else {
|
||||
simd16_cfg = v16->cfg;
|
||||
prog_data->dispatch_grf_start_reg_16 = v16->payload.num_regs;
|
||||
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs;
|
||||
prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used);
|
||||
const performance &perf = v16->performance_analysis.require();
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
@@ -7514,7 +7449,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||
"SIMD32 shader inefficient\n");
|
||||
} else {
|
||||
simd32_cfg = v32->cfg;
|
||||
prog_data->dispatch_grf_start_reg_32 = v32->payload.num_regs;
|
||||
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs;
|
||||
prog_data->reg_blocks_32 = brw_register_blocks(v32->grf_used);
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
}
|
||||
|
@@ -89,9 +89,15 @@ struct shader_stats {
|
||||
struct thread_payload {
|
||||
/** The number of thread payload registers the hardware will supply. */
|
||||
uint8_t num_regs;
|
||||
|
||||
virtual ~thread_payload() = default;
|
||||
};
|
||||
|
||||
struct fs_thread_payload : public thread_payload {
|
||||
fs_thread_payload(const fs_visitor &v,
|
||||
bool &source_depth_to_render_target,
|
||||
bool &runtime_check_aads_emit);
|
||||
|
||||
uint8_t subspan_coord_reg[2];
|
||||
uint8_t source_depth_reg[2];
|
||||
uint8_t source_w_reg[2];
|
||||
@@ -150,8 +156,6 @@ public:
|
||||
bool run_mesh(bool allow_spilling);
|
||||
void optimize();
|
||||
void allocate_registers(bool allow_spilling);
|
||||
void setup_fs_payload_gfx4();
|
||||
void setup_fs_payload_gfx6();
|
||||
void setup_vs_payload();
|
||||
void setup_gs_payload();
|
||||
void setup_cs_payload();
|
||||
@@ -411,7 +415,16 @@ public:
|
||||
bool failed;
|
||||
char *fail_msg;
|
||||
|
||||
fs_thread_payload payload;
|
||||
thread_payload *payload_;
|
||||
|
||||
thread_payload &payload() {
|
||||
return *this->payload_;
|
||||
}
|
||||
|
||||
fs_thread_payload &fs_payload() {
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
return *static_cast<fs_thread_payload *>(this->payload_);
|
||||
};
|
||||
|
||||
bool source_depth_to_render_target;
|
||||
bool runtime_check_aads_emit;
|
||||
|
264
src/intel/compiler/brw_fs_thread_payload.cpp
Normal file
264
src/intel/compiler/brw_fs_thread_payload.cpp
Normal file
@@ -0,0 +1,264 @@
|
||||
/*
|
||||
* Copyright © 2006-2022 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_fs.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
static inline void
|
||||
setup_fs_payload_gfx6(fs_thread_payload &payload,
|
||||
const fs_visitor &v,
|
||||
bool &source_depth_to_render_target)
|
||||
{
|
||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
|
||||
|
||||
const unsigned payload_width = MIN2(16, v.dispatch_width);
|
||||
assert(v.dispatch_width % payload_width == 0);
|
||||
assert(v.devinfo->ver >= 6);
|
||||
|
||||
payload.num_regs = 0;
|
||||
|
||||
/* R0: PS thread payload header. */
|
||||
payload.num_regs++;
|
||||
|
||||
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
|
||||
/* R1: masks, pixel X/Y coordinates. */
|
||||
payload.subspan_coord_reg[j] = payload.num_regs++;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
|
||||
/* R3-26: barycentric interpolation coordinates. These appear in the
|
||||
* same order that they appear in the brw_barycentric_mode enum. Each
|
||||
* set of coordinates occupies 2 registers if dispatch width == 8 and 4
|
||||
* registers if dispatch width == 16. Coordinates only appear if they
|
||||
* were enabled using the "Barycentric Interpolation Mode" bits in
|
||||
* WM_STATE.
|
||||
*/
|
||||
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
|
||||
if (prog_data->barycentric_interp_modes & (1 << i)) {
|
||||
payload.barycentric_coord_reg[i][j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* R27-28: interpolated depth if uses source depth */
|
||||
if (prog_data->uses_src_depth) {
|
||||
payload.source_depth_reg[j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 8;
|
||||
}
|
||||
|
||||
/* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */
|
||||
if (prog_data->uses_src_w) {
|
||||
payload.source_w_reg[j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 8;
|
||||
}
|
||||
|
||||
/* R31: MSAA position offsets. */
|
||||
if (prog_data->uses_pos_offset) {
|
||||
payload.sample_pos_reg[j] = payload.num_regs;
|
||||
payload.num_regs++;
|
||||
}
|
||||
|
||||
/* R32-33: MSAA input coverage mask */
|
||||
if (prog_data->uses_sample_mask) {
|
||||
assert(v.devinfo->ver >= 7);
|
||||
payload.sample_mask_in_reg[j] = payload.num_regs;
|
||||
payload.num_regs += payload_width / 8;
|
||||
}
|
||||
|
||||
/* R66: Source Depth and/or W Attribute Vertex Deltas */
|
||||
if (prog_data->uses_depth_w_coefficients) {
|
||||
payload.depth_w_coef_reg[j] = payload.num_regs;
|
||||
payload.num_regs++;
|
||||
}
|
||||
}
|
||||
|
||||
if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||
source_depth_to_render_target = true;
|
||||
}
|
||||
}
|
||||
|
||||
#undef P /* prompted depth */
|
||||
#undef C /* computed */
|
||||
#undef N /* non-promoted? */
|
||||
|
||||
#define P 0
|
||||
#define C 1
|
||||
#define N 2
|
||||
|
||||
static const struct {
|
||||
GLuint mode:2;
|
||||
GLuint sd_present:1;
|
||||
GLuint sd_to_rt:1;
|
||||
GLuint dd_present:1;
|
||||
GLuint ds_present:1;
|
||||
} wm_iz_table[BRW_WM_IZ_BIT_MAX] =
|
||||
{
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 1 },
|
||||
{ N, 0, 1, 0, 1 },
|
||||
{ N, 0, 1, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 0, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 1, 1, 0, 1 },
|
||||
{ C, 0, 1, 0, 1 },
|
||||
{ C, 0, 1, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 1, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 1, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 }
|
||||
};
|
||||
|
||||
/**
|
||||
* \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES
|
||||
* \param lookup bitmask of BRW_WM_IZ_* flags
|
||||
*/
|
||||
static inline void
|
||||
setup_fs_payload_gfx4(fs_thread_payload &payload,
|
||||
const fs_visitor &v,
|
||||
bool &source_depth_to_render_target,
|
||||
bool &runtime_check_aads_emit)
|
||||
{
|
||||
assert(v.dispatch_width <= 16);
|
||||
|
||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
|
||||
brw_wm_prog_key *key = (brw_wm_prog_key *) v.key;
|
||||
|
||||
GLuint reg = 1;
|
||||
bool kill_stats_promoted_workaround = false;
|
||||
int lookup = key->iz_lookup;
|
||||
|
||||
assert(lookup < BRW_WM_IZ_BIT_MAX);
|
||||
|
||||
/* Crazy workaround in the windowizer, which we need to track in
|
||||
* our register allocation and render target writes. See the "If
|
||||
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
|
||||
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
|
||||
*/
|
||||
if (key->stats_wm &&
|
||||
(lookup & BRW_WM_IZ_PS_KILL_ALPHATEST_BIT) &&
|
||||
wm_iz_table[lookup].mode == P) {
|
||||
kill_stats_promoted_workaround = true;
|
||||
}
|
||||
|
||||
payload.subspan_coord_reg[0] = reg++;
|
||||
|
||||
if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
|
||||
kill_stats_promoted_workaround) {
|
||||
payload.source_depth_reg[0] = reg;
|
||||
reg += 2;
|
||||
}
|
||||
|
||||
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
|
||||
source_depth_to_render_target = true;
|
||||
|
||||
if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
|
||||
payload.aa_dest_stencil_reg[0] = reg;
|
||||
runtime_check_aads_emit =
|
||||
!wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
|
||||
reg++;
|
||||
}
|
||||
|
||||
if (wm_iz_table[lookup].dd_present) {
|
||||
payload.dest_depth_reg[0] = reg;
|
||||
reg+=2;
|
||||
}
|
||||
|
||||
payload.num_regs = reg;
|
||||
}
|
||||
|
||||
#undef P /* prompted depth */
|
||||
#undef C /* computed */
|
||||
#undef N /* non-promoted? */
|
||||
|
||||
fs_thread_payload::fs_thread_payload(const fs_visitor &v,
|
||||
bool &source_depth_to_render_target,
|
||||
bool &runtime_check_aads_emit)
|
||||
: subspan_coord_reg(),
|
||||
source_depth_reg(),
|
||||
source_w_reg(),
|
||||
aa_dest_stencil_reg(),
|
||||
dest_depth_reg(),
|
||||
sample_pos_reg(),
|
||||
sample_mask_in_reg(),
|
||||
depth_w_coef_reg(),
|
||||
barycentric_coord_reg(),
|
||||
local_invocation_id_reg()
|
||||
{
|
||||
if (v.devinfo->ver >= 6)
|
||||
setup_fs_payload_gfx6(*this, v, source_depth_to_render_target);
|
||||
else
|
||||
setup_fs_payload_gfx4(*this, v, source_depth_to_render_target,
|
||||
runtime_check_aads_emit);
|
||||
}
|
@@ -194,7 +194,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
|
||||
abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart);
|
||||
}
|
||||
|
||||
this->pixel_z = fetch_payload_reg(bld, payload.source_depth_reg);
|
||||
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
|
||||
|
||||
/* The SF program automatically handles doing the perspective correction or
|
||||
* not based on wm_prog_data::interp_mode[] so we can use the same pixel
|
||||
@@ -469,7 +469,7 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
||||
* pixels locations, here we recompute the Z value with 2 coefficients
|
||||
* in X & Y axis.
|
||||
*/
|
||||
fs_reg coef_payload = fetch_payload_reg(abld, payload.depth_w_coef_reg, BRW_REGISTER_TYPE_F);
|
||||
fs_reg coef_payload = fetch_payload_reg(abld, fs_payload().depth_w_coef_reg, BRW_REGISTER_TYPE_F);
|
||||
const fs_reg x_start = brw_vec1_grf(coef_payload.nr, 2);
|
||||
const fs_reg y_start = brw_vec1_grf(coef_payload.nr, 6);
|
||||
const fs_reg z_cx = brw_vec1_grf(coef_payload.nr, 1);
|
||||
@@ -507,19 +507,19 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
||||
|
||||
if (wm_prog_data->uses_src_depth) {
|
||||
assert(!wm_prog_data->uses_depth_w_coefficients);
|
||||
this->pixel_z = fetch_payload_reg(bld, payload.source_depth_reg);
|
||||
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
|
||||
}
|
||||
|
||||
if (wm_prog_data->uses_src_w) {
|
||||
abld = bld.annotate("compute pos.w");
|
||||
this->pixel_w = fetch_payload_reg(abld, payload.source_w_reg);
|
||||
this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg);
|
||||
this->wpos_w = vgrf(glsl_type::float_type);
|
||||
abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
|
||||
}
|
||||
|
||||
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
|
||||
this->delta_xy[i] = fetch_barycentric_reg(
|
||||
bld, payload.barycentric_coord_reg[i]);
|
||||
bld, fs_payload().barycentric_coord_reg[i]);
|
||||
}
|
||||
|
||||
uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes &
|
||||
@@ -622,7 +622,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
|
||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||
|
||||
/* Hand over gl_FragDepth or the payload depth. */
|
||||
const fs_reg dst_depth = fetch_payload_reg(bld, payload.dest_depth_reg);
|
||||
const fs_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg);
|
||||
fs_reg src_depth, src_stencil;
|
||||
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||
@@ -636,7 +636,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
|
||||
* explicitly the pass-through case.
|
||||
*/
|
||||
assert(devinfo->ver <= 5);
|
||||
src_depth = fetch_payload_reg(bld, payload.source_depth_reg);
|
||||
src_depth = fetch_payload_reg(bld, fs_payload().source_depth_reg);
|
||||
}
|
||||
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
|
||||
@@ -1214,7 +1214,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||
init();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
fs_visitor::init()
|
||||
{
|
||||
@@ -1233,7 +1232,7 @@ fs_visitor::init()
|
||||
this->nir_ssa_values = NULL;
|
||||
this->nir_system_values = NULL;
|
||||
|
||||
memset(&this->payload, 0, sizeof(this->payload));
|
||||
this->payload_ = new thread_payload();
|
||||
this->source_depth_to_render_target = false;
|
||||
this->runtime_check_aads_emit = false;
|
||||
this->first_non_payload_grf = 0;
|
||||
@@ -1254,4 +1253,5 @@ fs_visitor::init()
|
||||
|
||||
fs_visitor::~fs_visitor()
|
||||
{
|
||||
delete this->payload_;
|
||||
}
|
||||
|
@@ -2681,7 +2681,7 @@ fs_visitor::lower_logical_sends()
|
||||
lower_fb_write_logical_send(ibld, inst,
|
||||
brw_wm_prog_data(prog_data),
|
||||
(const brw_wm_prog_key *)key,
|
||||
payload);
|
||||
fs_payload());
|
||||
break;
|
||||
|
||||
case FS_OPCODE_FB_READ_LOGICAL:
|
||||
|
@@ -1211,9 +1211,9 @@ fs_visitor::nir_emit_task_mesh_intrinsic(const fs_builder &bld,
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_mesh_inline_data_intel:
|
||||
assert(payload.num_regs == 3 || payload.num_regs == 4);
|
||||
assert(payload().num_regs == 3 || payload().num_regs == 4);
|
||||
/* Inline Parameter is the last element of the payload. */
|
||||
bld.MOV(dest, retype(brw_vec1_grf(payload.num_regs - 1,
|
||||
bld.MOV(dest, retype(brw_vec1_grf(payload().num_regs - 1,
|
||||
nir_intrinsic_align_offset(instr)),
|
||||
dest.type));
|
||||
break;
|
||||
|
@@ -1420,7 +1420,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
|
||||
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
|
||||
|
||||
fs_generator g(compiler, params->log_data, mem_ctx,
|
||||
|
@@ -2642,7 +2642,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
|
||||
|
||||
fs_generator g(compiler, params->log_data, mem_ctx,
|
||||
&prog_data->base.base, v.runtime_check_aads_emit,
|
||||
|
@@ -823,7 +823,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
|
||||
debug_enabled);
|
||||
if (v.run_gs()) {
|
||||
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
|
||||
|
||||
fs_generator g(compiler, params->log_data, mem_ctx,
|
||||
&prog_data->base.base, false, MESA_SHADER_GEOMETRY);
|
||||
|
@@ -453,7 +453,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
|
||||
|
||||
fs_generator g(compiler, params->log_data, mem_ctx,
|
||||
&prog_data->base.base, false, MESA_SHADER_TESS_CTRL);
|
||||
|
@@ -1,169 +0,0 @@
|
||||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#include "brw_fs.h"
|
||||
|
||||
|
||||
#undef P /* prompted depth */
|
||||
#undef C /* computed */
|
||||
#undef N /* non-promoted? */
|
||||
|
||||
#define P 0
|
||||
#define C 1
|
||||
#define N 2
|
||||
|
||||
static const struct {
|
||||
GLuint mode:2;
|
||||
GLuint sd_present:1;
|
||||
GLuint sd_to_rt:1;
|
||||
GLuint dd_present:1;
|
||||
GLuint ds_present:1;
|
||||
} wm_iz_table[BRW_WM_IZ_BIT_MAX] =
|
||||
{
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ N, 0, 1, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ C, 0, 1, 1, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 1 },
|
||||
{ N, 0, 1, 0, 1 },
|
||||
{ N, 0, 1, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ N, 1, 1, 0, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 0, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 0, 1, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 1, 1, 0, 1 },
|
||||
{ C, 0, 1, 0, 1 },
|
||||
{ C, 0, 1, 0, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 1, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ P, 0, 0, 0, 0 },
|
||||
{ C, 1, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 },
|
||||
{ C, 0, 1, 1, 1 }
|
||||
};
|
||||
|
||||
/**
|
||||
* \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES
|
||||
* \param lookup bitmask of BRW_WM_IZ_* flags
|
||||
*/
|
||||
void fs_visitor::setup_fs_payload_gfx4()
|
||||
{
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
assert(dispatch_width <= 16);
|
||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
||||
GLuint reg = 1;
|
||||
bool kill_stats_promoted_workaround = false;
|
||||
int lookup = key->iz_lookup;
|
||||
|
||||
assert(lookup < BRW_WM_IZ_BIT_MAX);
|
||||
|
||||
/* Crazy workaround in the windowizer, which we need to track in
|
||||
* our register allocation and render target writes. See the "If
|
||||
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
|
||||
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
|
||||
*/
|
||||
if (key->stats_wm &&
|
||||
(lookup & BRW_WM_IZ_PS_KILL_ALPHATEST_BIT) &&
|
||||
wm_iz_table[lookup].mode == P) {
|
||||
kill_stats_promoted_workaround = true;
|
||||
}
|
||||
|
||||
payload.subspan_coord_reg[0] = reg++;
|
||||
|
||||
if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
|
||||
kill_stats_promoted_workaround) {
|
||||
payload.source_depth_reg[0] = reg;
|
||||
reg += 2;
|
||||
}
|
||||
|
||||
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
|
||||
source_depth_to_render_target = true;
|
||||
|
||||
if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
|
||||
payload.aa_dest_stencil_reg[0] = reg;
|
||||
runtime_check_aads_emit =
|
||||
!wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
|
||||
reg++;
|
||||
}
|
||||
|
||||
if (wm_iz_table[lookup].dd_present) {
|
||||
payload.dest_depth_reg[0] = reg;
|
||||
reg+=2;
|
||||
}
|
||||
|
||||
payload.num_regs = reg;
|
||||
}
|
@@ -65,6 +65,7 @@ libintel_compiler_files = files(
|
||||
'brw_fs_saturate_propagation.cpp',
|
||||
'brw_fs_scoreboard.cpp',
|
||||
'brw_fs_sel_peephole.cpp',
|
||||
'brw_fs_thread_payload.cpp',
|
||||
'brw_fs_validate.cpp',
|
||||
'brw_fs_visitor.cpp',
|
||||
'brw_inst.h',
|
||||
@@ -139,7 +140,6 @@ libintel_compiler_files = files(
|
||||
'brw_vec4_vs_visitor.cpp',
|
||||
'brw_vec4_vs.h',
|
||||
'brw_vue_map.c',
|
||||
'brw_wm_iz.cpp',
|
||||
'gfx6_gs_visitor.cpp',
|
||||
'gfx6_gs_visitor.h',
|
||||
)
|
||||
|
Reference in New Issue
Block a user