intel/fs: Extend thread payload layout to SIMD32
And handle 32-wide payload register reads in fetch_payload_reg(). v2 (Jason Ekstrand); - Fix some whitespace and brace placement Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:

committed by
Jason Ekstrand

parent
8f143f70d6
commit
f6c4aace22
@@ -4050,12 +4050,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||||||
assert(length == 0 || length == 2);
|
assert(length == 0 || length == 2);
|
||||||
header_size = length;
|
header_size = length;
|
||||||
|
|
||||||
if (payload.aa_dest_stencil_reg) {
|
if (payload.aa_dest_stencil_reg[0]) {
|
||||||
assert(inst->group < 16);
|
assert(inst->group < 16);
|
||||||
sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
|
sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
|
||||||
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
|
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
|
||||||
.MOV(sources[length],
|
.MOV(sources[length],
|
||||||
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
|
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg[0], 0)));
|
||||||
length++;
|
length++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6054,7 +6054,7 @@ fs_visitor::setup_fs_payload_gen6()
|
|||||||
*/
|
*/
|
||||||
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
|
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
|
||||||
if (prog_data->barycentric_interp_modes & (1 << i)) {
|
if (prog_data->barycentric_interp_modes & (1 << i)) {
|
||||||
payload.barycentric_coord_reg[i] = payload.num_regs;
|
payload.barycentric_coord_reg[i][0] = payload.num_regs;
|
||||||
payload.num_regs += 2;
|
payload.num_regs += 2;
|
||||||
if (dispatch_width == 16) {
|
if (dispatch_width == 16) {
|
||||||
payload.num_regs += 2;
|
payload.num_regs += 2;
|
||||||
@@ -6066,7 +6066,7 @@ fs_visitor::setup_fs_payload_gen6()
|
|||||||
prog_data->uses_src_depth =
|
prog_data->uses_src_depth =
|
||||||
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
||||||
if (prog_data->uses_src_depth) {
|
if (prog_data->uses_src_depth) {
|
||||||
payload.source_depth_reg = payload.num_regs;
|
payload.source_depth_reg[0] = payload.num_regs;
|
||||||
payload.num_regs++;
|
payload.num_regs++;
|
||||||
if (dispatch_width == 16) {
|
if (dispatch_width == 16) {
|
||||||
/* R28: interpolated depth if not SIMD8. */
|
/* R28: interpolated depth if not SIMD8. */
|
||||||
@@ -6078,7 +6078,7 @@ fs_visitor::setup_fs_payload_gen6()
|
|||||||
prog_data->uses_src_w =
|
prog_data->uses_src_w =
|
||||||
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
||||||
if (prog_data->uses_src_w) {
|
if (prog_data->uses_src_w) {
|
||||||
payload.source_w_reg = payload.num_regs;
|
payload.source_w_reg[0] = payload.num_regs;
|
||||||
payload.num_regs++;
|
payload.num_regs++;
|
||||||
if (dispatch_width == 16) {
|
if (dispatch_width == 16) {
|
||||||
/* R30: interpolated W if not SIMD8. */
|
/* R30: interpolated W if not SIMD8. */
|
||||||
@@ -6099,7 +6099,7 @@ fs_visitor::setup_fs_payload_gen6()
|
|||||||
* persample dispatch, we hard-code it to 0.5.
|
* persample dispatch, we hard-code it to 0.5.
|
||||||
*/
|
*/
|
||||||
prog_data->uses_pos_offset = true;
|
prog_data->uses_pos_offset = true;
|
||||||
payload.sample_pos_reg = payload.num_regs;
|
payload.sample_pos_reg[0] = payload.num_regs;
|
||||||
payload.num_regs++;
|
payload.num_regs++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6108,7 +6108,7 @@ fs_visitor::setup_fs_payload_gen6()
|
|||||||
(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) != 0;
|
(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) != 0;
|
||||||
if (prog_data->uses_sample_mask) {
|
if (prog_data->uses_sample_mask) {
|
||||||
assert(devinfo->gen >= 7);
|
assert(devinfo->gen >= 7);
|
||||||
payload.sample_mask_in_reg = payload.num_regs;
|
payload.sample_mask_in_reg[0] = payload.num_regs;
|
||||||
payload.num_regs++;
|
payload.num_regs++;
|
||||||
if (dispatch_width == 16) {
|
if (dispatch_width == 16) {
|
||||||
/* R33: input coverage mask if not SIMD8. */
|
/* R33: input coverage mask if not SIMD8. */
|
||||||
|
@@ -338,14 +338,15 @@ public:
|
|||||||
|
|
||||||
/** Register numbers for thread payload fields. */
|
/** Register numbers for thread payload fields. */
|
||||||
struct thread_payload {
|
struct thread_payload {
|
||||||
uint8_t source_depth_reg;
|
uint8_t subspan_coord_reg[2];
|
||||||
uint8_t source_w_reg;
|
uint8_t source_depth_reg[2];
|
||||||
uint8_t aa_dest_stencil_reg;
|
uint8_t source_w_reg[2];
|
||||||
uint8_t dest_depth_reg;
|
uint8_t aa_dest_stencil_reg[2];
|
||||||
uint8_t sample_pos_reg;
|
uint8_t dest_depth_reg[2];
|
||||||
uint8_t sample_mask_in_reg;
|
uint8_t sample_pos_reg[2];
|
||||||
uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT];
|
uint8_t sample_mask_in_reg[2];
|
||||||
uint8_t local_invocation_id_reg;
|
uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
|
||||||
|
uint8_t local_invocation_id_reg[2];
|
||||||
|
|
||||||
/** The number of thread payload registers the hardware will supply. */
|
/** The number of thread payload registers the hardware will supply. */
|
||||||
uint8_t num_regs;
|
uint8_t num_regs;
|
||||||
@@ -499,13 +500,32 @@ private:
|
|||||||
|
|
||||||
namespace brw {
|
namespace brw {
|
||||||
inline fs_reg
|
inline fs_reg
|
||||||
fetch_payload_reg(const brw::fs_builder &bld, uint8_t reg,
|
fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
|
||||||
brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1)
|
brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1)
|
||||||
{
|
{
|
||||||
if (!reg) {
|
if (!regs[0])
|
||||||
return fs_reg();
|
return fs_reg();
|
||||||
|
|
||||||
|
if (bld.dispatch_width() > 16) {
|
||||||
|
const fs_reg tmp = bld.vgrf(type, n);
|
||||||
|
const brw::fs_builder hbld = bld.exec_all().group(16, 0);
|
||||||
|
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
|
||||||
|
fs_reg *const components = new fs_reg[n * m];
|
||||||
|
|
||||||
|
for (unsigned c = 0; c < n; c++) {
|
||||||
|
for (unsigned g = 0; g < m; g++) {
|
||||||
|
components[c * m + g] =
|
||||||
|
offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hbld.LOAD_PAYLOAD(tmp, components, n * m, 0);
|
||||||
|
|
||||||
|
delete[] components;
|
||||||
|
return tmp;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
return fs_reg(retype(brw_vec8_grf(reg, 0), type));
|
return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -122,9 +122,10 @@ static const struct {
|
|||||||
void fs_visitor::setup_fs_payload_gen4()
|
void fs_visitor::setup_fs_payload_gen4()
|
||||||
{
|
{
|
||||||
assert(stage == MESA_SHADER_FRAGMENT);
|
assert(stage == MESA_SHADER_FRAGMENT);
|
||||||
|
assert(dispatch_width <= 16);
|
||||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
||||||
GLuint reg = 2;
|
GLuint reg = 1;
|
||||||
bool kill_stats_promoted_workaround = false;
|
bool kill_stats_promoted_workaround = false;
|
||||||
int lookup = key->iz_lookup;
|
int lookup = key->iz_lookup;
|
||||||
|
|
||||||
@@ -141,11 +142,13 @@ void fs_visitor::setup_fs_payload_gen4()
|
|||||||
kill_stats_promoted_workaround = true;
|
kill_stats_promoted_workaround = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
payload.subspan_coord_reg[0] = reg++;
|
||||||
|
|
||||||
prog_data->uses_src_depth =
|
prog_data->uses_src_depth =
|
||||||
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
|
||||||
if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
|
if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
|
||||||
kill_stats_promoted_workaround) {
|
kill_stats_promoted_workaround) {
|
||||||
payload.source_depth_reg = reg;
|
payload.source_depth_reg[0] = reg;
|
||||||
reg += 2;
|
reg += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,14 +156,14 @@ void fs_visitor::setup_fs_payload_gen4()
|
|||||||
source_depth_to_render_target = true;
|
source_depth_to_render_target = true;
|
||||||
|
|
||||||
if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
|
if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
|
||||||
payload.aa_dest_stencil_reg = reg;
|
payload.aa_dest_stencil_reg[0] = reg;
|
||||||
runtime_check_aads_emit =
|
runtime_check_aads_emit =
|
||||||
!wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
|
!wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
|
||||||
reg++;
|
reg++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wm_iz_table[lookup].dd_present) {
|
if (wm_iz_table[lookup].dd_present) {
|
||||||
payload.dest_depth_reg = reg;
|
payload.dest_depth_reg[0] = reg;
|
||||||
reg+=2;
|
reg+=2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user