intel/compiler/xe2: Handle new URB write messages

Rework:
 * idr v1: Fix compilation error.
 * idr v2: Add support for per-channel offsets.
 * idr v3: get_lowered_simd_width is 16 on Xe2+.
 * idr v4: Add disassembly support.  Add validation support.
 * Sqaushed in changes Marcin Ślusarz's patches:
   * "intel/compiler: skip adding 0 to payload address"
   * "intel/compiler/xe2: drop masking off top 8 bits of URB handle"

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25195>
This commit is contained in:
Ian Romanick
2022-07-20 10:21:21 -07:00
committed by Marge Bot
parent fa53a7d241
commit feec9166cd
4 changed files with 234 additions and 36 deletions

View File

@@ -2222,41 +2222,95 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
} }
case BRW_SFID_URB: { case BRW_SFID_URB: {
unsigned opcode = brw_inst_urb_opcode(devinfo, inst); if (devinfo->ver >= 20) {
format(file, " (");
format(file, " offset %"PRIu64, brw_inst_urb_global_offset(devinfo, inst)); const enum lsc_opcode op = lsc_msg_desc_opcode(devinfo, imm_desc);
err |= control(file, "operation", lsc_operation,
space = 1; op, &space);
format(file, ",");
err |= control(file, "urb opcode", err |= control(file, "addr_size", lsc_addr_size,
devinfo->ver >= 7 ? gfx7_urb_opcode lsc_msg_desc_addr_size(devinfo, imm_desc),
: gfx5_urb_opcode,
opcode, &space);
if (devinfo->ver >= 7 &&
brw_inst_urb_per_slot_offset(devinfo, inst)) {
string(file, " per-slot");
}
if (opcode == GFX8_URB_OPCODE_SIMD8_WRITE ||
opcode == GFX8_URB_OPCODE_SIMD8_READ) {
if (brw_inst_urb_channel_mask_present(devinfo, inst))
string(file, " masked");
} else if (opcode != GFX125_URB_OPCODE_FENCE) {
err |= control(file, "urb swizzle", urb_swizzle,
brw_inst_urb_swizzle_control(devinfo, inst),
&space); &space);
}
if (devinfo->ver < 7) { format(file, ",");
err |= control(file, "urb allocate", urb_allocate, err |= control(file, "data_size", lsc_data_size,
brw_inst_urb_allocate(devinfo, inst), &space); lsc_msg_desc_data_size(devinfo, imm_desc),
err |= control(file, "urb used", urb_used, &space);
brw_inst_urb_used(devinfo, inst), &space); format(file, ",");
} if (lsc_opcode_has_cmask(op)) {
if (devinfo->ver < 8) { err |= control(file, "component_mask",
err |= control(file, "urb complete", urb_complete, lsc_cmask_str,
brw_inst_urb_complete(devinfo, inst), &space); lsc_msg_desc_cmask(devinfo, imm_desc),
&space);
} else {
err |= control(file, "vector_size",
lsc_vect_size_str,
lsc_msg_desc_vect_size(devinfo, imm_desc),
&space);
if (lsc_msg_desc_transpose(devinfo, imm_desc))
format(file, ", transpose");
}
switch(op) {
case LSC_OP_LOAD_CMASK:
case LSC_OP_LOAD:
format(file, ",");
err |= control(file, "cache_load",
lsc_cache_load,
lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
&space);
break;
default:
format(file, ",");
err |= control(file, "cache_store",
lsc_cache_store,
lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
&space);
break;
}
format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc));
format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc));
format(file, " src1_len = %d", brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
err |= control(file, "address_type", lsc_addr_surface_type,
lsc_msg_desc_addr_type(devinfo, imm_desc), &space);
format(file, " )");
} else {
unsigned urb_opcode = brw_inst_urb_opcode(devinfo, inst);
format(file, " offset %"PRIu64, brw_inst_urb_global_offset(devinfo, inst));
space = 1;
err |= control(file, "urb opcode",
devinfo->ver >= 7 ? gfx7_urb_opcode
: gfx5_urb_opcode,
urb_opcode, &space);
if (devinfo->ver >= 7 &&
brw_inst_urb_per_slot_offset(devinfo, inst)) {
string(file, " per-slot");
}
if (urb_opcode == GFX8_URB_OPCODE_SIMD8_WRITE ||
urb_opcode == GFX8_URB_OPCODE_SIMD8_READ) {
if (brw_inst_urb_channel_mask_present(devinfo, inst))
string(file, " masked");
} else if (urb_opcode != GFX125_URB_OPCODE_FENCE) {
err |= control(file, "urb swizzle", urb_swizzle,
brw_inst_urb_swizzle_control(devinfo, inst),
&space);
}
if (devinfo->ver < 7) {
err |= control(file, "urb allocate", urb_allocate,
brw_inst_urb_allocate(devinfo, inst), &space);
err |= control(file, "urb used", urb_used,
brw_inst_urb_used(devinfo, inst), &space);
}
if (devinfo->ver < 8) {
err |= control(file, "urb complete", urb_complete,
brw_inst_urb_complete(devinfo, inst), &space);
}
} }
break; break;
} }

View File

@@ -2485,6 +2485,10 @@ send_descriptor_restrictions(const struct brw_isa_info *isa,
const uint32_t desc = brw_inst_send_desc(devinfo, inst); const uint32_t desc = brw_inst_send_desc(devinfo, inst);
switch (brw_inst_sfid(devinfo, inst)) { switch (brw_inst_sfid(devinfo, inst)) {
case BRW_SFID_URB:
if (devinfo->ver < 20)
break;
FALLTHROUGH;
case GFX12_SFID_TGM: case GFX12_SFID_TGM:
case GFX12_SFID_SLM: case GFX12_SFID_SLM:
case GFX12_SFID_UGM: case GFX12_SFID_UGM:
@@ -2500,7 +2504,7 @@ send_descriptor_restrictions(const struct brw_isa_info *isa,
break; break;
} }
if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB) { if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB && devinfo->ver < 20) {
/* Gfx4 doesn't have a "header present" bit in the SEND message. */ /* Gfx4 doesn't have a "header present" bit in the SEND message. */
ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst), ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst),
"Header must be present for all URB messages."); "Header must be present for all URB messages.");

View File

@@ -5391,7 +5391,7 @@ get_lowered_simd_width(const struct brw_compiler *compiler,
case SHADER_OPCODE_URB_READ_LOGICAL: case SHADER_OPCODE_URB_READ_LOGICAL:
case SHADER_OPCODE_URB_WRITE_LOGICAL: case SHADER_OPCODE_URB_WRITE_LOGICAL:
return MIN2(8, inst->exec_size); return MIN2(devinfo->ver < 20 ? 8 : 16, inst->exec_size);
case SHADER_OPCODE_QUAD_SWIZZLE: { case SHADER_OPCODE_QUAD_SWIZZLE: {
const unsigned swiz = inst->src[1].ud; const unsigned swiz = inst->src[1].ud;

View File

@@ -266,6 +266,142 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst)
inst->src[3] = brw_null_reg(); inst->src[3] = brw_null_reg();
} }
static void
lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
{
/* FINISHME: This is not yet implemented. */
assert(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file == BAD_FILE);
const intel_device_info *devinfo = bld.shader->devinfo;
assert(devinfo->has_lsc);
/* Get the logical send arguments. */
const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
const fs_reg src = inst->src[URB_LOGICAL_SRC_DATA];
/* Calculate the total number of components of the payload. */
const unsigned src_comps = inst->components_read(URB_LOGICAL_SRC_DATA);
const unsigned src_sz = type_sz(src.type);
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.MOV(payload, handle);
/* The low 24-bits of the URB handle is a byte offset into the URB area.
* Add the offset of the write to this value.
*
* FINISHME: What units is inst->offset? vec4, right?
*/
if (inst->offset) {
bld.ADD(payload, payload, brw_imm_ud(inst->offset * 16));
inst->offset = 0;
}
if (inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) {
fs_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
fs_reg payload1 = bld.vgrf(BRW_REGISTER_TYPE_UD);
fs_reg payload2 = bld.vgrf(BRW_REGISTER_TYPE_UD);
for (unsigned i = 1; i < src_comps; i++) {
bld.MOV(payload2, offset(src, bld, i - 1));
bld.ADD(payload1, payload, offset(offsets, bld, i - 1));
fs_inst *send = bld.emit(SHADER_OPCODE_SEND,
bld.null_reg_ud(),
brw_imm_ud(0) /* desc */,
brw_imm_ud(0) /* ex_desc */,
brw_vec8_grf(0, 0) /* payload */);
const unsigned ex_mlen = (src_sz * inst->exec_size) / REG_SIZE;
send->sfid = BRW_SFID_URB;
send->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, send->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32, 1 /* num_channels */,
false /* transpose */,
LSC_CACHE_STORE_L1UC_L3UC,
false /* has_dest */);
send->mlen = lsc_msg_desc_src0_len(devinfo, send->desc);
send->ex_mlen = ex_mlen;
send->header_size = 0;
send->send_has_side_effects = true;
send->send_is_volatile = false;
send->resize_sources(4);
send->src[0] = brw_imm_ud(0);
send->src[1] = brw_imm_ud(0);
send->src[2] = payload1;
send->src[3] = payload2;
}
bld.MOV(payload2, offset(src, bld, src_comps - 1));
bld.ADD(payload1, payload, offset(offsets, bld, src_comps - 1));
const unsigned ex_mlen = (src_sz * inst->exec_size) / REG_SIZE;
inst->sfid = BRW_SFID_URB;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32, 1 /* num_channels */,
false /* transpose */,
LSC_CACHE_STORE_L1UC_L3UC,
false /* has_dest */);
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->ex_mlen = ex_mlen;
inst->header_size = 0;
inst->send_has_side_effects = true;
inst->send_is_volatile = false;
inst->resize_sources(4);
inst->src[0] = brw_imm_ud(0);
inst->src[1] = brw_imm_ud(0);
inst->src[2] = payload1;
inst->src[3] = payload2;
} else {
fs_reg payload2 = bld.move_to_vgrf(src, src_comps);
const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE;
inst->sfid = BRW_SFID_URB;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32, src_comps /* num_channels */,
false /* transpose */,
LSC_CACHE_STORE_L1UC_L3UC,
false /* has_dest */);
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->ex_mlen = ex_mlen;
inst->header_size = 0;
inst->send_has_side_effects = true;
inst->send_is_volatile = false;
inst->resize_sources(4);
inst->src[0] = brw_imm_ud(0);
inst->src[1] = brw_imm_ud(0);
inst->src[2] = payload;
inst->src[3] = payload2;
}
}
static void static void
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
fs_reg *dst, fs_reg color, unsigned components) fs_reg *dst, fs_reg color, unsigned components)
@@ -3201,7 +3337,11 @@ fs_visitor::lower_logical_sends()
break; break;
case SHADER_OPCODE_URB_WRITE_LOGICAL: case SHADER_OPCODE_URB_WRITE_LOGICAL:
lower_urb_write_logical_send(ibld, inst); if (devinfo->ver < 20)
lower_urb_write_logical_send(ibld, inst);
else
lower_urb_write_logical_send_xe2(ibld, inst);
break; break;
default: default: