intel/compiler: Use SHADER_OPCODE_SEND for PI messages

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21094>
This commit is contained in:
Jason Ekstrand
2021-11-19 17:57:42 -06:00
committed by Marge Bot
parent 9c62e0c77d
commit 714a291673
7 changed files with 79 additions and 83 deletions

View File

@@ -1671,9 +1671,13 @@ brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
unsigned msg_type,
bool noperspective,
bool coarse_pixel_rate,
unsigned simd_mode,
unsigned slot_group)
unsigned exec_size,
unsigned group)
{
assert(exec_size == 8 || exec_size == 16);
const bool simd_mode = exec_size == 16;
const bool slot_group = group >= 16;
assert(devinfo->ver >= 10 || !coarse_pixel_rate);
return (SET_BITS(slot_group, 11, 11) |
SET_BITS(msg_type, 13, 12) |

View File

@@ -3345,38 +3345,6 @@ brw_memory_fence(struct brw_codegen *p,
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
}
void
brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg dest,
struct brw_reg mrf,
bool noperspective,
bool coarse_pixel_rate,
unsigned mode,
struct brw_reg data,
unsigned msg_length,
unsigned response_length)
{
const struct intel_device_info *devinfo = p->devinfo;
const uint16_t exec_size = brw_get_default_exec_size(p);
const unsigned slot_group = brw_get_default_group(p) / 16;
const unsigned simd_mode = (exec_size == BRW_EXECUTE_16);
const unsigned desc =
brw_message_desc(devinfo, msg_length, response_length, false) |
brw_pixel_interp_desc(devinfo, mode, noperspective, coarse_pixel_rate,
simd_mode, slot_group);
/* brw_send_indirect_message will automatically use a direct send message
* if data is actually immediate.
*/
brw_send_indirect_message(p,
GFX7_SFID_PIXEL_INTERPOLATOR,
dest,
mrf,
vec1(data),
desc,
false);
}
void
brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, bool last)
{

View File

@@ -625,12 +625,6 @@ private:
struct brw_reg dst,
struct brw_reg index);
void generate_pixel_interpolator_query(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src,
struct brw_reg msg_data,
unsigned msg_type);
void generate_set_sample_id(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,

View File

@@ -1611,31 +1611,6 @@ fs_generator::generate_varying_pull_constant_load_gfx4(fs_inst *inst,
msg_type, simd_mode, return_format));
}
void
fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src,
struct brw_reg msg_data,
unsigned msg_type)
{
const bool has_payload = inst->src[0].file != BAD_FILE;
assert(msg_data.type == BRW_REGISTER_TYPE_UD);
assert(inst->size_written % REG_SIZE == 0);
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
brw_pixel_interpolator_query(p,
retype(dst, BRW_REGISTER_TYPE_UW),
/* If we don't have a payload, what we send doesn't matter */
has_payload ? src : brw_vec8_grf(0, 0),
inst->pi_noperspective,
prog_data->per_coarse_pixel_dispatch,
msg_type,
msg_data,
has_payload ? 2 * inst->exec_size / 8 : 1,
inst->size_written / REG_SIZE);
}
/* Sets vstride=1, width=4, hstride=0 of register src1 during
* the ADD instruction.
*/
@@ -2389,24 +2364,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
}
break;
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE);
send_count++;
break;
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET);
send_count++;
break;
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
generate_pixel_interpolator_query(inst, dst, src[0], src[1],
GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
send_count++;
break;
case CS_OPCODE_CS_TERMINATE:
generate_cs_terminate(inst, src[0]);
send_count++;

View File

@@ -1107,6 +1107,13 @@ namespace {
abort();
}
case GFX7_SFID_PIXEL_INTERPOLATOR:
if (devinfo->ver >= 7)
return calculate_desc(info, EU_UNIT_PI, 2, 0, 0, 14 /* XXX */, 0,
0, 90 /* XXX */, 0, 0, 0, 0);
else
abort();
case GFX12_SFID_UGM:
case GFX12_SFID_TGM:
case GFX12_SFID_SLM:

View File

@@ -2446,6 +2446,61 @@ lower_math_logical_send(const fs_builder &bld, fs_inst *inst)
}
}
static void
lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
const struct brw_wm_prog_data *wm_prog_data)
{
const intel_device_info *devinfo = bld.shader->devinfo;
/* We have to send something */
fs_reg payload = brw_vec8_grf(0, 0);
unsigned mlen = 1;
const fs_reg desc = inst->src[1];
unsigned mode;
switch (inst->opcode) {
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
assert(inst->src[0].file == BAD_FILE);
mode = GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE;
break;
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
assert(inst->src[0].file == BAD_FILE);
mode = GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET;
break;
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
payload = inst->src[0];
mlen = 2 * inst->exec_size / 8;
mode = GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET;
break;
default:
unreachable("Invalid interpolator instruction");
}
uint32_t desc_imm =
brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective,
wm_prog_data->per_coarse_pixel_dispatch,
inst->exec_size, inst->group);
assert(bld.shader->devinfo->ver >= 7);
inst->opcode = SHADER_OPCODE_SEND;
inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR;
inst->desc = desc_imm;
inst->ex_desc = 0;
inst->mlen = mlen;
inst->ex_mlen = 0;
inst->send_has_side_effects = false;
inst->send_is_volatile = false;
inst->resize_sources(3);
inst->src[0] = desc;
inst->src[1] = brw_imm_ud(0); /* ex_desc */
inst->src[2] = payload;
}
static void
lower_btd_logical_send(const fs_builder &bld, fs_inst *inst)
{
@@ -2749,6 +2804,13 @@ fs_visitor::lower_logical_sends()
continue;
}
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
lower_interpolator_logical_send(ibld, inst,
brw_wm_prog_data(prog_data));
break;
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
lower_btd_logical_send(ibld, inst);

View File

@@ -532,6 +532,10 @@ schedule_node::set_latency_gfx7(bool is_haswell)
}
break;
case GFX7_SFID_PIXEL_INTERPOLATOR:
latency = 50; /* TODO */
break;
case GFX12_SFID_UGM:
case GFX12_SFID_TGM:
case GFX12_SFID_SLM: