intel/compiler: Use FS thread payload only for FS

Move the setup into the FS thread payload constructor.  Consolidate
payload setup for that in brw_fs_thread_payload.cpp file.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18176>
This commit is contained in:
Caio Oliveira
2022-08-19 12:40:20 -07:00
committed by Marge Bot
parent dab66d20a7
commit 73920b7e2f
12 changed files with 322 additions and 279 deletions

View File

@@ -1299,7 +1299,7 @@ fs_visitor::emit_samplepos_setup()
* the positions using vstride=16, width=8, hstride=2.
*/
const fs_reg sample_pos_reg =
fetch_payload_reg(abld, payload.sample_pos_reg, BRW_REGISTER_TYPE_W);
fetch_payload_reg(abld, fs_payload().sample_pos_reg, BRW_REGISTER_TYPE_W);
for (unsigned i = 0; i < 2; i++) {
fs_reg tmp_d = bld.vgrf(BRW_REGISTER_TYPE_D);
@@ -1430,7 +1430,7 @@ fs_visitor::emit_samplemaskin_setup()
assert(!wm_prog_data->per_coarse_pixel_dispatch);
fs_reg coverage_mask =
fetch_payload_reg(bld, payload.sample_mask_in_reg, BRW_REGISTER_TYPE_D);
fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
if (wm_prog_data->persample_dispatch) {
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
@@ -1616,7 +1616,7 @@ fs_visitor::assign_curb_setup()
fs_reg(), /* payload2 */
};
fs_reg dest = retype(brw_vec8_grf(payload.num_regs + i, 0),
fs_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0),
BRW_REGISTER_TYPE_UD);
fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, dest, srcs, 4);
@@ -1667,7 +1667,7 @@ fs_visitor::assign_curb_setup()
assert(constant_nr / 8 < 64);
used |= BITFIELD64_BIT(constant_nr / 8);
struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs +
struct brw_reg brw_reg = brw_vec1_grf(payload().num_regs +
constant_nr / 8,
constant_nr % 8);
brw_reg.abs = inst->src[i].abs;
@@ -1688,7 +1688,7 @@ fs_visitor::assign_curb_setup()
/* push_reg_mask_param is in 32-bit units */
unsigned mask_param = stage_prog_data->push_reg_mask_param;
struct brw_reg mask = brw_vec1_grf(payload.num_regs + mask_param / 8,
struct brw_reg mask = brw_vec1_grf(payload().num_regs + mask_param / 8,
mask_param % 8);
fs_reg b32;
@@ -1708,7 +1708,7 @@ fs_visitor::assign_curb_setup()
if (want_zero & BITFIELD64_BIT(i)) {
assert(i < prog_data->curb_read_length);
struct brw_reg push_reg =
retype(brw_vec8_grf(payload.num_regs + i, 0),
retype(brw_vec8_grf(payload().num_regs + i, 0),
BRW_REGISTER_TYPE_D);
ubld.AND(push_reg, push_reg, component(b32, i % 16));
@@ -1719,7 +1719,7 @@ fs_visitor::assign_curb_setup()
}
/* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
this->first_non_payload_grf = payload().num_regs + prog_data->curb_read_length;
}
/*
@@ -1956,7 +1956,7 @@ fs_visitor::assign_urb_setup()
assert(stage == MESA_SHADER_FRAGMENT);
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
int urb_start = payload.num_regs + prog_data->base.curb_read_length;
int urb_start = payload().num_regs + prog_data->base.curb_read_length;
/* Offset all the urb_setup[] index by the actual position of the
* setup regs, now that the location of the constants has been chosen.
@@ -2000,7 +2000,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
{
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
int grf = payload.num_regs +
int grf = payload().num_regs +
prog_data->curb_read_length +
inst->src[i].nr +
inst->src[i].offset / REG_SIZE;
@@ -5852,78 +5852,11 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con
fprintf(file, "\n");
}
void
fs_visitor::setup_fs_payload_gfx6()
{
assert(stage == MESA_SHADER_FRAGMENT);
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
const unsigned payload_width = MIN2(16, dispatch_width);
assert(dispatch_width % payload_width == 0);
assert(devinfo->ver >= 6);
/* R0: PS thread payload header. */
payload.num_regs++;
for (unsigned j = 0; j < dispatch_width / payload_width; j++) {
/* R1: masks, pixel X/Y coordinates. */
payload.subspan_coord_reg[j] = payload.num_regs++;
}
for (unsigned j = 0; j < dispatch_width / payload_width; j++) {
/* R3-26: barycentric interpolation coordinates. These appear in the
* same order that they appear in the brw_barycentric_mode enum. Each
* set of coordinates occupies 2 registers if dispatch width == 8 and 4
* registers if dispatch width == 16. Coordinates only appear if they
* were enabled using the "Barycentric Interpolation Mode" bits in
* WM_STATE.
*/
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
if (prog_data->barycentric_interp_modes & (1 << i)) {
payload.barycentric_coord_reg[i][j] = payload.num_regs;
payload.num_regs += payload_width / 4;
}
}
/* R27-28: interpolated depth if uses source depth */
if (prog_data->uses_src_depth) {
payload.source_depth_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */
if (prog_data->uses_src_w) {
payload.source_w_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R31: MSAA position offsets. */
if (prog_data->uses_pos_offset) {
payload.sample_pos_reg[j] = payload.num_regs;
payload.num_regs++;
}
/* R32-33: MSAA input coverage mask */
if (prog_data->uses_sample_mask) {
assert(devinfo->ver >= 7);
payload.sample_mask_in_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R66: Source Depth and/or W Attribute Vertex Deltas */
if (prog_data->uses_depth_w_coefficients) {
payload.depth_w_coef_reg[j] = payload.num_regs;
payload.num_regs++;
}
}
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
}
void
fs_visitor::setup_vs_payload()
{
thread_payload &payload = this->payload();
/* R0: thread header, R1: urb handles */
payload.num_regs = 2;
}
@@ -5932,6 +5865,7 @@ void
fs_visitor::setup_gs_payload()
{
assert(stage == MESA_SHADER_GEOMETRY);
thread_payload &payload = this->payload();
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
@@ -5974,6 +5908,8 @@ fs_visitor::setup_gs_payload()
void
fs_visitor::setup_cs_payload()
{
thread_payload &payload = this->payload();
assert(devinfo->ver >= 7);
/* TODO: Fill out uses_btd_stack_ids automatically */
payload.num_regs = 1 + brw_cs_prog_data(prog_data)->uses_btd_stack_ids;
@@ -6687,6 +6623,7 @@ bool
fs_visitor::run_tcs()
{
assert(stage == MESA_SHADER_TESS_CTRL);
thread_payload &payload = this->payload();
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
@@ -6761,7 +6698,7 @@ fs_visitor::run_tes()
assert(stage == MESA_SHADER_TESS_EVAL);
/* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */
payload.num_regs = 5;
payload().num_regs = 5;
emit_nir_code();
@@ -6863,10 +6800,8 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
assert(stage == MESA_SHADER_FRAGMENT);
if (devinfo->ver >= 6)
setup_fs_payload_gfx6();
else
setup_fs_payload_gfx4();
payload_ = new fs_thread_payload(*this, source_depth_to_render_target,
runtime_check_aads_emit);
if (0) {
emit_dummy_fs();
@@ -6971,7 +6906,7 @@ fs_visitor::run_bs(bool allow_spilling)
assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
/* R0: thread header, R1: stack IDs, R2: argument addresses */
payload.num_regs = 3;
payload().num_regs = 3;
emit_nir_code();
@@ -7017,7 +6952,7 @@ fs_visitor::run_task(bool allow_spilling)
* Inline parameter is optional but always present since we use it to pass
* the address to descriptors.
*/
payload.num_regs = dispatch_width == 32 ? 4 : 3;
payload().num_regs = dispatch_width == 32 ? 4 : 3;
emit_nir_code();
@@ -7064,7 +6999,7 @@ fs_visitor::run_mesh(bool allow_spilling)
* Inline parameter is optional but always present since we use it to pass
* the address to descriptors.
*/
payload.num_regs = dispatch_width == 32 ? 4 : 3;
payload().num_regs = dispatch_width == 32 ? 4 : 3;
emit_nir_code();
@@ -7437,7 +7372,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
return NULL;
} else if (!INTEL_DEBUG(DEBUG_NO8)) {
simd8_cfg = v8->cfg;
prog_data->base.dispatch_grf_start_reg = v8->payload.num_regs;
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs;
prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used);
const performance &perf = v8->performance_analysis.require();
throughput = MAX2(throughput, perf.throughput);
@@ -7481,7 +7416,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
v16->fail_msg);
} else {
simd16_cfg = v16->cfg;
prog_data->dispatch_grf_start_reg_16 = v16->payload.num_regs;
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs;
prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used);
const performance &perf = v16->performance_analysis.require();
throughput = MAX2(throughput, perf.throughput);
@@ -7514,7 +7449,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
"SIMD32 shader inefficient\n");
} else {
simd32_cfg = v32->cfg;
prog_data->dispatch_grf_start_reg_32 = v32->payload.num_regs;
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs;
prog_data->reg_blocks_32 = brw_register_blocks(v32->grf_used);
throughput = MAX2(throughput, perf.throughput);
}

View File

@@ -89,9 +89,15 @@ struct shader_stats {
struct thread_payload {
/** The number of thread payload registers the hardware will supply. */
uint8_t num_regs;
virtual ~thread_payload() = default;
};
struct fs_thread_payload : public thread_payload {
fs_thread_payload(const fs_visitor &v,
bool &source_depth_to_render_target,
bool &runtime_check_aads_emit);
uint8_t subspan_coord_reg[2];
uint8_t source_depth_reg[2];
uint8_t source_w_reg[2];
@@ -150,8 +156,6 @@ public:
bool run_mesh(bool allow_spilling);
void optimize();
void allocate_registers(bool allow_spilling);
void setup_fs_payload_gfx4();
void setup_fs_payload_gfx6();
void setup_vs_payload();
void setup_gs_payload();
void setup_cs_payload();
@@ -411,7 +415,16 @@ public:
bool failed;
char *fail_msg;
fs_thread_payload payload;
thread_payload *payload_;
thread_payload &payload() {
return *this->payload_;
}
fs_thread_payload &fs_payload() {
assert(stage == MESA_SHADER_FRAGMENT);
return *static_cast<fs_thread_payload *>(this->payload_);
};
bool source_depth_to_render_target;
bool runtime_check_aads_emit;

View File

@@ -0,0 +1,264 @@
/*
* Copyright © 2006-2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_fs.h"
using namespace brw;
static inline void
setup_fs_payload_gfx6(fs_thread_payload &payload,
const fs_visitor &v,
bool &source_depth_to_render_target)
{
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
const unsigned payload_width = MIN2(16, v.dispatch_width);
assert(v.dispatch_width % payload_width == 0);
assert(v.devinfo->ver >= 6);
payload.num_regs = 0;
/* R0: PS thread payload header. */
payload.num_regs++;
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
/* R1: masks, pixel X/Y coordinates. */
payload.subspan_coord_reg[j] = payload.num_regs++;
}
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
/* R3-26: barycentric interpolation coordinates. These appear in the
* same order that they appear in the brw_barycentric_mode enum. Each
* set of coordinates occupies 2 registers if dispatch width == 8 and 4
* registers if dispatch width == 16. Coordinates only appear if they
* were enabled using the "Barycentric Interpolation Mode" bits in
* WM_STATE.
*/
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
if (prog_data->barycentric_interp_modes & (1 << i)) {
payload.barycentric_coord_reg[i][j] = payload.num_regs;
payload.num_regs += payload_width / 4;
}
}
/* R27-28: interpolated depth if uses source depth */
if (prog_data->uses_src_depth) {
payload.source_depth_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */
if (prog_data->uses_src_w) {
payload.source_w_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R31: MSAA position offsets. */
if (prog_data->uses_pos_offset) {
payload.sample_pos_reg[j] = payload.num_regs;
payload.num_regs++;
}
/* R32-33: MSAA input coverage mask */
if (prog_data->uses_sample_mask) {
assert(v.devinfo->ver >= 7);
payload.sample_mask_in_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R66: Source Depth and/or W Attribute Vertex Deltas */
if (prog_data->uses_depth_w_coefficients) {
payload.depth_w_coef_reg[j] = payload.num_regs;
payload.num_regs++;
}
}
if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
}
#undef P /* prompted depth */
#undef C /* computed */
#undef N /* non-promoted? */
#define P 0
#define C 1
#define N 2
static const struct {
GLuint mode:2;
GLuint sd_present:1;
GLuint sd_to_rt:1;
GLuint dd_present:1;
GLuint ds_present:1;
} wm_iz_table[BRW_WM_IZ_BIT_MAX] =
{
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 1 },
{ N, 0, 1, 0, 1 },
{ N, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ C, 0, 0, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 0, 1 },
{ C, 0, 1, 0, 1 },
{ C, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 }
};
/**
* \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES
* \param lookup bitmask of BRW_WM_IZ_* flags
*/
static inline void
setup_fs_payload_gfx4(fs_thread_payload &payload,
const fs_visitor &v,
bool &source_depth_to_render_target,
bool &runtime_check_aads_emit)
{
assert(v.dispatch_width <= 16);
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
brw_wm_prog_key *key = (brw_wm_prog_key *) v.key;
GLuint reg = 1;
bool kill_stats_promoted_workaround = false;
int lookup = key->iz_lookup;
assert(lookup < BRW_WM_IZ_BIT_MAX);
/* Crazy workaround in the windowizer, which we need to track in
* our register allocation and render target writes. See the "If
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
*/
if (key->stats_wm &&
(lookup & BRW_WM_IZ_PS_KILL_ALPHATEST_BIT) &&
wm_iz_table[lookup].mode == P) {
kill_stats_promoted_workaround = true;
}
payload.subspan_coord_reg[0] = reg++;
if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
kill_stats_promoted_workaround) {
payload.source_depth_reg[0] = reg;
reg += 2;
}
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
source_depth_to_render_target = true;
if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
payload.aa_dest_stencil_reg[0] = reg;
runtime_check_aads_emit =
!wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
reg++;
}
if (wm_iz_table[lookup].dd_present) {
payload.dest_depth_reg[0] = reg;
reg+=2;
}
payload.num_regs = reg;
}
#undef P /* prompted depth */
#undef C /* computed */
#undef N /* non-promoted? */
fs_thread_payload::fs_thread_payload(const fs_visitor &v,
bool &source_depth_to_render_target,
bool &runtime_check_aads_emit)
: subspan_coord_reg(),
source_depth_reg(),
source_w_reg(),
aa_dest_stencil_reg(),
dest_depth_reg(),
sample_pos_reg(),
sample_mask_in_reg(),
depth_w_coef_reg(),
barycentric_coord_reg(),
local_invocation_id_reg()
{
if (v.devinfo->ver >= 6)
setup_fs_payload_gfx6(*this, v, source_depth_to_render_target);
else
setup_fs_payload_gfx4(*this, v, source_depth_to_render_target,
runtime_check_aads_emit);
}

View File

@@ -194,7 +194,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart);
}
this->pixel_z = fetch_payload_reg(bld, payload.source_depth_reg);
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
/* The SF program automatically handles doing the perspective correction or
* not based on wm_prog_data::interp_mode[] so we can use the same pixel
@@ -469,7 +469,7 @@ fs_visitor::emit_interpolation_setup_gfx6()
* pixels locations, here we recompute the Z value with 2 coefficients
* in X & Y axis.
*/
fs_reg coef_payload = fetch_payload_reg(abld, payload.depth_w_coef_reg, BRW_REGISTER_TYPE_F);
fs_reg coef_payload = fetch_payload_reg(abld, fs_payload().depth_w_coef_reg, BRW_REGISTER_TYPE_F);
const fs_reg x_start = brw_vec1_grf(coef_payload.nr, 2);
const fs_reg y_start = brw_vec1_grf(coef_payload.nr, 6);
const fs_reg z_cx = brw_vec1_grf(coef_payload.nr, 1);
@@ -507,19 +507,19 @@ fs_visitor::emit_interpolation_setup_gfx6()
if (wm_prog_data->uses_src_depth) {
assert(!wm_prog_data->uses_depth_w_coefficients);
this->pixel_z = fetch_payload_reg(bld, payload.source_depth_reg);
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
}
if (wm_prog_data->uses_src_w) {
abld = bld.annotate("compute pos.w");
this->pixel_w = fetch_payload_reg(abld, payload.source_w_reg);
this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg);
this->wpos_w = vgrf(glsl_type::float_type);
abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
}
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
this->delta_xy[i] = fetch_barycentric_reg(
bld, payload.barycentric_coord_reg[i]);
bld, fs_payload().barycentric_coord_reg[i]);
}
uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes &
@@ -622,7 +622,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
/* Hand over gl_FragDepth or the payload depth. */
const fs_reg dst_depth = fetch_payload_reg(bld, payload.dest_depth_reg);
const fs_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg);
fs_reg src_depth, src_stencil;
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
@@ -636,7 +636,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
* explicitly the pass-through case.
*/
assert(devinfo->ver <= 5);
src_depth = fetch_payload_reg(bld, payload.source_depth_reg);
src_depth = fetch_payload_reg(bld, fs_payload().source_depth_reg);
}
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
@@ -1214,7 +1214,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
init();
}
void
fs_visitor::init()
{
@@ -1233,7 +1232,7 @@ fs_visitor::init()
this->nir_ssa_values = NULL;
this->nir_system_values = NULL;
memset(&this->payload, 0, sizeof(this->payload));
this->payload_ = new thread_payload();
this->source_depth_to_render_target = false;
this->runtime_check_aads_emit = false;
this->first_non_payload_grf = 0;
@@ -1254,4 +1253,5 @@ fs_visitor::init()
fs_visitor::~fs_visitor()
{
delete this->payload_;
}

View File

@@ -2681,7 +2681,7 @@ fs_visitor::lower_logical_sends()
lower_fb_write_logical_send(ibld, inst,
brw_wm_prog_data(prog_data),
(const brw_wm_prog_key *)key,
payload);
fs_payload());
break;
case FS_OPCODE_FB_READ_LOGICAL:

View File

@@ -1211,9 +1211,9 @@ fs_visitor::nir_emit_task_mesh_intrinsic(const fs_builder &bld,
switch (instr->intrinsic) {
case nir_intrinsic_load_mesh_inline_data_intel:
assert(payload.num_regs == 3 || payload.num_regs == 4);
assert(payload().num_regs == 3 || payload().num_regs == 4);
/* Inline Parameter is the last element of the payload. */
bld.MOV(dest, retype(brw_vec1_grf(payload.num_regs - 1,
bld.MOV(dest, retype(brw_vec1_grf(payload().num_regs - 1,
nir_intrinsic_align_offset(instr)),
dest.type));
break;

View File

@@ -1420,7 +1420,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
return NULL;
}
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
fs_generator g(compiler, params->log_data, mem_ctx,

View File

@@ -2642,7 +2642,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
return NULL;
}
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
fs_generator g(compiler, params->log_data, mem_ctx,
&prog_data->base.base, v.runtime_check_aads_emit,

View File

@@ -823,7 +823,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
debug_enabled);
if (v.run_gs()) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
fs_generator g(compiler, params->log_data, mem_ctx,
&prog_data->base.base, false, MESA_SHADER_GEOMETRY);

View File

@@ -453,7 +453,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
return NULL;
}
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
fs_generator g(compiler, params->log_data, mem_ctx,
&prog_data->base.base, false, MESA_SHADER_TESS_CTRL);

View File

@@ -1,169 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "brw_fs.h"
#undef P /* prompted depth */
#undef C /* computed */
#undef N /* non-promoted? */
#define P 0
#define C 1
#define N 2
static const struct {
GLuint mode:2;
GLuint sd_present:1;
GLuint sd_to_rt:1;
GLuint dd_present:1;
GLuint ds_present:1;
} wm_iz_table[BRW_WM_IZ_BIT_MAX] =
{
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ N, 0, 1, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 0 },
{ C, 0, 1, 1, 0 },
{ C, 0, 1, 1, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 1 },
{ N, 0, 1, 0, 1 },
{ N, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ N, 1, 1, 0, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ C, 0, 0, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 0, 1 },
{ C, 0, 1, 0, 1 },
{ C, 0, 1, 0, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ P, 0, 0, 0, 0 },
{ C, 1, 1, 1, 1 },
{ C, 0, 1, 1, 1 },
{ C, 0, 1, 1, 1 }
};
/**
* \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES
* \param lookup bitmask of BRW_WM_IZ_* flags
*/
void fs_visitor::setup_fs_payload_gfx4()
{
assert(stage == MESA_SHADER_FRAGMENT);
assert(dispatch_width <= 16);
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
GLuint reg = 1;
bool kill_stats_promoted_workaround = false;
int lookup = key->iz_lookup;
assert(lookup < BRW_WM_IZ_BIT_MAX);
/* Crazy workaround in the windowizer, which we need to track in
* our register allocation and render target writes. See the "If
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
*/
if (key->stats_wm &&
(lookup & BRW_WM_IZ_PS_KILL_ALPHATEST_BIT) &&
wm_iz_table[lookup].mode == P) {
kill_stats_promoted_workaround = true;
}
payload.subspan_coord_reg[0] = reg++;
if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
kill_stats_promoted_workaround) {
payload.source_depth_reg[0] = reg;
reg += 2;
}
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
source_depth_to_render_target = true;
if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
payload.aa_dest_stencil_reg[0] = reg;
runtime_check_aads_emit =
!wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
reg++;
}
if (wm_iz_table[lookup].dd_present) {
payload.dest_depth_reg[0] = reg;
reg+=2;
}
payload.num_regs = reg;
}

View File

@@ -65,6 +65,7 @@ libintel_compiler_files = files(
'brw_fs_saturate_propagation.cpp',
'brw_fs_scoreboard.cpp',
'brw_fs_sel_peephole.cpp',
'brw_fs_thread_payload.cpp',
'brw_fs_validate.cpp',
'brw_fs_visitor.cpp',
'brw_inst.h',
@@ -139,7 +140,6 @@ libintel_compiler_files = files(
'brw_vec4_vs_visitor.cpp',
'brw_vec4_vs.h',
'brw_vue_map.c',
'brw_wm_iz.cpp',
'gfx6_gs_visitor.cpp',
'gfx6_gs_visitor.h',
)