intel/elk: Remove ex_desc and ex_mlen from elk_inst

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
Caio Oliveira
2024-02-10 18:58:01 -08:00
committed by Marge Bot
parent 75e13ac705
commit ff64e68ef4
7 changed files with 41 additions and 219 deletions

View File

@@ -850,8 +850,6 @@ elk_fs_inst::size_read(int arg) const
case ELK_SHADER_OPCODE_SEND:
if (arg == 2) {
return mlen * REG_SIZE;
} else if (arg == 3) {
return ex_mlen * REG_SIZE;
}
break;
@@ -2733,10 +2731,6 @@ elk_fs_visitor::opt_zero_samples()
if (send->keep_payload_trailing_zeros)
continue;
/* This pass works on SENDs before splitting. */
if (send->ex_mlen > 0)
continue;
elk_fs_inst *lp = (elk_fs_inst *) send->prev;
if (lp->is_head_sentinel() || lp->opcode != ELK_SHADER_OPCODE_LOAD_PAYLOAD)
@@ -5518,10 +5512,6 @@ elk_fs_visitor::dump_instruction_to_file(const elk_backend_instruction *be_inst,
fprintf(file, "(mlen: %d) ", inst->mlen);
}
if (inst->ex_mlen) {
fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen);
}
if (inst->eot) {
fprintf(file, "(EOT) ");
}

View File

@@ -467,9 +467,7 @@ private:
void generate_send(elk_fs_inst *inst,
struct elk_reg dst,
struct elk_reg desc,
struct elk_reg ex_desc,
struct elk_reg payload,
struct elk_reg payload2);
struct elk_reg payload);
void generate_fb_write(elk_fs_inst *inst, struct elk_reg payload);
void generate_cs_terminate(elk_fs_inst *inst, struct elk_reg payload);
void generate_barrier(elk_fs_inst *inst, struct elk_reg src);

View File

@@ -185,7 +185,6 @@ instructions_match(elk_fs_inst *a, elk_fs_inst *b, bool *negate)
a->dst.type == b->dst.type &&
a->offset == b->offset &&
a->mlen == b->mlen &&
a->ex_mlen == b->ex_mlen &&
a->sfid == b->sfid &&
a->desc == b->desc &&
a->size_written == b->size_written &&

View File

@@ -321,11 +321,9 @@ elk_fs_generator::patch_halt_jumps()
void
elk_fs_generator::generate_send(elk_fs_inst *inst,
struct elk_reg dst,
struct elk_reg desc,
struct elk_reg ex_desc,
struct elk_reg payload,
struct elk_reg payload2)
struct elk_reg dst,
struct elk_reg desc,
struct elk_reg payload)
{
const bool dst_is_null = dst.file == ELK_ARCHITECTURE_REGISTER_FILE &&
dst.nr == ELK_ARF_NULL;
@@ -334,18 +332,10 @@ elk_fs_generator::generate_send(elk_fs_inst *inst,
uint32_t desc_imm = inst->desc |
elk_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
uint32_t ex_desc_imm = inst->ex_desc |
elk_message_ex_desc(devinfo, inst->ex_mlen);
if (ex_desc.file != ELK_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm ||
inst->send_ex_desc_scratch) {
unreachable("no split sends available");
} else {
elk_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm,
inst->eot);
if (inst->check_tdr)
elk_inst_set_opcode(p->isa, elk_last_inst, ELK_OPCODE_SENDC);
}
elk_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm,
inst->eot);
if (inst->check_tdr)
elk_inst_set_opcode(p->isa, elk_last_inst, ELK_OPCODE_SENDC);
}
void
@@ -1960,8 +1950,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
break;
case ELK_SHADER_OPCODE_SEND:
generate_send(inst, dst, src[0], src[1], src[2],
inst->ex_mlen > 0 ? src[3] : elk_null_reg());
generate_send(inst, dst, src[0], src[2]);
send_count++;
break;

View File

@@ -604,25 +604,6 @@ elk_fs_reg_alloc::setup_inst_interference(const elk_fs_inst *inst)
grf127_send_hack_node);
}
/* From the Skylake PRM Vol. 2a docs for sends:
*
* "It is required that the second block of GRFs does not overlap with
* the first block."
*
* Normally, this is taken care of by fixup_sends_duplicate_payload() but
* in the case where one of the registers is an undefined value, the
* register allocator may decide that they don't interfere even though
* they're used as sources in the same instruction. We also need to add
* interference here.
*/
if (devinfo->ver >= 9) {
if (inst->opcode == ELK_SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
inst->src[2].file == VGRF && inst->src[3].file == VGRF &&
inst->src[2].nr != inst->src[3].nr)
ra_add_node_interference(g, first_vgrf_node + inst->src[2].nr,
first_vgrf_node + inst->src[3].nr);
}
/* When we do send-from-GRF for FB writes, we need to ensure that the last
* write instruction sends from a high register. This is because the
* vertex fetcher wants to start filling the low payload registers while
@@ -652,12 +633,6 @@ elk_fs_reg_alloc::setup_inst_interference(const elk_fs_inst *inst)
}
ra_set_node_reg(g, first_vgrf_node + vgrf, reg);
if (inst->ex_mlen > 0) {
const int vgrf = inst->src[3].nr;
reg -= DIV_ROUND_UP(fs->alloc.sizes[vgrf], reg_unit(devinfo));
ra_set_node_reg(g, first_vgrf_node + vgrf, reg);
}
}
}
@@ -880,7 +855,6 @@ elk_fs_reg_alloc::emit_spill(const fs_builder &bld,
elk_fs_reg src,
uint32_t spill_offset, unsigned count, int ip)
{
const intel_device_info *devinfo = bld.shader->devinfo;
const unsigned reg_size = src.component_size(bld.dispatch_width()) /
REG_SIZE;
assert(count % reg_size == 0);
@@ -888,39 +862,11 @@ elk_fs_reg_alloc::emit_spill(const fs_builder &bld,
for (unsigned i = 0; i < count / reg_size; i++) {
++stats->spill_count;
elk_fs_inst *spill_inst;
if (devinfo->ver >= 9) {
elk_fs_reg header = this->scratch_header;
fs_builder ubld = bld.exec_all().group(1, 0);
assert(spill_offset % 16 == 0);
spill_inst = ubld.MOV(component(header, 2),
elk_imm_ud(spill_offset / 16));
_mesa_set_add(spill_insts, spill_inst);
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
const elk_fs_reg ex_desc = elk_imm_ud(0);
elk_fs_reg srcs[] = { elk_imm_ud(0), ex_desc, header, src };
spill_inst = bld.emit(ELK_SHADER_OPCODE_SEND, bld.null_reg_f(),
srcs, ARRAY_SIZE(srcs));
spill_inst->mlen = 1;
spill_inst->ex_mlen = reg_size;
spill_inst->size_written = 0;
spill_inst->header_size = 1;
spill_inst->send_has_side_effects = true;
spill_inst->send_is_volatile = false;
spill_inst->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
spill_inst->desc =
elk_dp_desc(devinfo, bti,
GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE,
ELK_DATAPORT_OWORD_BLOCK_DWORDS(reg_size * 8));
} else {
spill_inst = bld.emit(ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE,
bld.null_reg_f(), src);
spill_inst->offset = spill_offset;
spill_inst->mlen = 1 + reg_size; /* header, value */
spill_inst->base_mrf = spill_base_mrf(bld.shader);
}
elk_fs_inst *spill_inst = bld.emit(ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE,
bld.null_reg_f(), src);
spill_inst->offset = spill_offset;
spill_inst->mlen = 1 + reg_size; /* header, value */
spill_inst->base_mrf = spill_base_mrf(bld.shader);
_mesa_set_add(spill_insts, spill_inst);
src.offset += reg_size * REG_SIZE;

View File

@@ -158,12 +158,10 @@ struct elk_backend_instruction {
uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
uint8_t mlen; /**< SEND message length */
uint8_t ex_mlen; /**< SENDS extended message length */
int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
uint8_t target; /**< MRT target. */
uint8_t sfid; /**< SFID for SEND instructions */
uint32_t desc; /**< SEND[S] message descriptor immediate */
uint32_t ex_desc; /**< SEND[S] extended message descriptor immediate */
unsigned size_written; /**< Data written to the destination register in bytes. */
enum elk_opcode opcode; /* ELK_OPCODE_* or ELK_FS_OPCODE_* */

View File

@@ -62,8 +62,6 @@ lower_urb_read_logical_send(const fs_builder &bld, elk_fs_inst *inst)
inst->offset);
inst->mlen = header_size;
inst->ex_desc = 0;
inst->ex_mlen = 0;
inst->send_is_volatile = true;
inst->resize_sources(4);
@@ -124,7 +122,6 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst)
/* Update the original instruction. */
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->ex_mlen = 0;
inst->header_size = 0;
inst->send_has_side_effects = true;
inst->send_is_volatile = false;
@@ -183,8 +180,6 @@ lower_urb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst)
inst->offset);
inst->mlen = length;
inst->ex_desc = 0;
inst->ex_mlen = 0;
inst->send_has_side_effects = true;
inst->resize_sources(4);
@@ -209,7 +204,6 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst)
/* Calculate the total number of components of the payload. */
const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA));
const unsigned src_sz = type_sz(src.type);
elk_fs_reg payload = bld.vgrf(ELK_REGISTER_TYPE_UD);
@@ -240,7 +234,6 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst)
}
elk_fs_reg payload2 = bld.move_to_vgrf(src, src_comps);
const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE;
inst->sfid = ELK_SFID_URB;
@@ -257,7 +250,6 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, elk_fs_inst *inst)
/* Update the original instruction. */
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->ex_mlen = ex_mlen;
inst->header_size = 0;
inst->send_has_side_effects = true;
inst->send_is_volatile = false;
@@ -520,18 +512,6 @@ lower_fb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst,
desc = component(desc, 0);
}
uint32_t ex_desc = 0;
if (devinfo->ver >= 11) {
/* Set the "Render Target Index" and "Src0 Alpha Present" fields
* in the extended message descriptor, in lieu of using a header.
*/
ex_desc = inst->target << 12 | (src0_alpha.file != BAD_FILE) << 15;
if (key->nr_color_regions == 0)
ex_desc |= 1 << 20; /* Null Render Target */
}
inst->ex_desc = ex_desc;
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->resize_sources(3);
inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
@@ -539,7 +519,6 @@ lower_fb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst,
inst->src[1] = elk_imm_ud(0);
inst->src[2] = payload;
inst->mlen = regs_written(load);
inst->ex_mlen = 0;
inst->header_size = header_size;
inst->check_tdr = true;
inst->send_has_side_effects = true;
@@ -1291,8 +1270,6 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, elk_fs_inst *inst, elk_op
inst->src[1] = elk_imm_ud(0); /* ex_desc */
}
inst->ex_desc = 0;
inst->src[2] = src_payload;
inst->resize_sources(3);
@@ -1547,44 +1524,28 @@ lower_surface_logical_send(const fs_builder &bld, elk_fs_inst *inst)
const unsigned header_sz = header.file != BAD_FILE ? 1 : 0;
elk_fs_reg payload, payload2;
unsigned mlen, ex_mlen = 0;
if (devinfo->ver >= 9 &&
(src.file == BAD_FILE || header.file == BAD_FILE)) {
/* We have split sends on gfx9 and above */
if (header.file == BAD_FILE) {
payload = bld.move_to_vgrf(addr, addr_sz);
payload2 = bld.move_to_vgrf(src, src_sz);
mlen = addr_sz * (inst->exec_size / 8);
ex_mlen = src_sz * (inst->exec_size / 8);
} else {
assert(src.file == BAD_FILE);
payload = header;
payload2 = bld.move_to_vgrf(addr, addr_sz);
mlen = header_sz;
ex_mlen = addr_sz * (inst->exec_size / 8);
}
} else {
/* Allocate space for the payload. */
const unsigned sz = header_sz + addr_sz + src_sz;
payload = bld.vgrf(ELK_REGISTER_TYPE_UD, sz);
elk_fs_reg *const components = new elk_fs_reg[sz];
unsigned n = 0;
unsigned mlen;
/* Construct the payload. */
if (header.file != BAD_FILE)
components[n++] = header;
/* Allocate space for the payload. */
const unsigned sz = header_sz + addr_sz + src_sz;
payload = bld.vgrf(ELK_REGISTER_TYPE_UD, sz);
elk_fs_reg *const components = new elk_fs_reg[sz];
unsigned n = 0;
for (unsigned i = 0; i < addr_sz; i++)
components[n++] = offset(addr, bld, i);
/* Construct the payload. */
if (header.file != BAD_FILE)
components[n++] = header;
for (unsigned i = 0; i < src_sz; i++)
components[n++] = offset(src, bld, i);
for (unsigned i = 0; i < addr_sz; i++)
components[n++] = offset(addr, bld, i);
bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
for (unsigned i = 0; i < src_sz; i++)
components[n++] = offset(src, bld, i);
delete[] components;
}
bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
delete[] components;
/* Predicate the instruction on the sample mask if no header is
* provided.
@@ -1704,7 +1665,6 @@ lower_surface_logical_send(const fs_builder &bld, elk_fs_inst *inst)
/* Update the original instruction. */
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->mlen = mlen;
inst->ex_mlen = ex_mlen;
inst->header_size = header_sz;
inst->send_has_side_effects = has_side_effects;
inst->send_is_volatile = !has_side_effects;
@@ -1765,16 +1725,13 @@ lower_surface_block_logical_send(const fs_builder &bld, elk_fs_inst *inst)
ubld.group(1, 0).MOV(component(header, 2), addr);
elk_fs_reg data;
unsigned ex_mlen = 0;
if (write) {
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
data = retype(bld.move_to_vgrf(src, src_sz), ELK_REGISTER_TYPE_UD);
ex_mlen = src_sz * type_sz(src.type) * inst->exec_size / REG_SIZE;
}
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->mlen = 1;
inst->ex_mlen = ex_mlen;
inst->header_size = 1;
inst->send_has_side_effects = has_side_effects;
inst->send_is_volatile = !has_side_effects;
@@ -1791,33 +1748,6 @@ lower_surface_block_logical_send(const fs_builder &bld, elk_fs_inst *inst)
inst->src[3] = data;
}
static elk_fs_reg
emit_a64_oword_block_header(const fs_builder &bld, const elk_fs_reg &addr)
{
const fs_builder ubld = bld.exec_all().group(8, 0);
assert(type_sz(addr.type) == 8 && addr.stride == 0);
elk_fs_reg expanded_addr = addr;
if (addr.file == UNIFORM) {
/* We can't do stride 1 with the UNIFORM file, it requires stride 0 */
expanded_addr = ubld.vgrf(ELK_REGISTER_TYPE_UQ);
expanded_addr.stride = 0;
ubld.MOV(expanded_addr, retype(addr, ELK_REGISTER_TYPE_UQ));
}
elk_fs_reg header = ubld.vgrf(ELK_REGISTER_TYPE_UD);
ubld.MOV(header, elk_imm_ud(0));
/* Use a 2-wide MOV to fill out the address */
elk_fs_reg addr_vec2 = expanded_addr;
addr_vec2.type = ELK_REGISTER_TYPE_UD;
addr_vec2.stride = 1;
ubld.group(2, 0).MOV(header, addr_vec2);
return header;
}
static void
emit_fragment_mask(const fs_builder &bld, elk_fs_inst *inst)
{
@@ -1851,44 +1781,21 @@ lower_a64_logical_send(const fs_builder &bld, elk_fs_inst *inst)
const bool has_side_effects = inst->has_side_effects();
elk_fs_reg payload, payload2;
unsigned mlen, ex_mlen = 0, header_size = 0;
if (inst->opcode == ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL ||
inst->opcode == ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL ||
inst->opcode == ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL) {
assert(devinfo->ver >= 9);
unsigned mlen, header_size = 0;
/* OWORD messages only take a scalar address in a header */
mlen = 1;
header_size = 1;
payload = emit_a64_oword_block_header(bld, addr);
/* Add two because the address is 64-bit */
const unsigned dwords = 2 + src_comps;
mlen = dwords * (inst->exec_size / 8);
if (inst->opcode == ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL) {
ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;
payload2 = retype(bld.move_to_vgrf(src, src_comps),
ELK_REGISTER_TYPE_UD);
}
} else if (devinfo->ver >= 9) {
/* On Skylake and above, we have SENDS */
mlen = 2 * (inst->exec_size / 8);
ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;
payload = retype(bld.move_to_vgrf(addr, 1), ELK_REGISTER_TYPE_UD);
payload2 = retype(bld.move_to_vgrf(src, src_comps),
ELK_REGISTER_TYPE_UD);
} else {
/* Add two because the address is 64-bit */
const unsigned dwords = 2 + src_comps;
mlen = dwords * (inst->exec_size / 8);
elk_fs_reg sources[5];
elk_fs_reg sources[5];
sources[0] = addr;
sources[0] = addr;
for (unsigned i = 0; i < src_comps; i++)
sources[1 + i] = offset(src, bld, i);
for (unsigned i = 0; i < src_comps; i++)
sources[1 + i] = offset(src, bld, i);
payload = bld.vgrf(ELK_REGISTER_TYPE_UD, dwords);
bld.LOAD_PAYLOAD(payload, sources, 1 + src_comps, 0);
}
payload = bld.vgrf(ELK_REGISTER_TYPE_UD, dwords);
bld.LOAD_PAYLOAD(payload, sources, 1 + src_comps, 0);
uint32_t desc;
switch (inst->opcode) {
@@ -1955,7 +1862,6 @@ lower_a64_logical_send(const fs_builder &bld, elk_fs_inst *inst)
/* Update the original instruction. */
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->mlen = mlen;
inst->ex_mlen = ex_mlen;
inst->header_size = header_size;
inst->send_has_side_effects = has_side_effects;
inst->send_is_volatile = !has_side_effects;
@@ -2212,9 +2118,7 @@ lower_interpolator_logical_send(const fs_builder &bld, elk_fs_inst *inst,
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR;
inst->desc = desc_imm;
inst->ex_desc = 0;
inst->mlen = mlen;
inst->ex_mlen = 0;
inst->send_has_side_effects = false;
inst->send_is_volatile = false;
@@ -2241,8 +2145,6 @@ lower_get_buffer_size(const fs_builder &bld, elk_fs_inst *inst)
inst->opcode = ELK_SHADER_OPCODE_SEND;
inst->mlen = inst->exec_size / 8;
inst->resize_sources(3);
inst->ex_mlen = 0;
inst->ex_desc = 0;
/* src[0] & src[1] are filled by setup_surface_descriptors() */
inst->src[2] = lod;