intel/compiler/xe2: Handle new URB write messages
Rework: * idr v1: Fix compilation error. * idr v2: Add support for per-channel offsets. * idr v3: get_lowered_simd_width is 16 on Xe2+. * idr v4: Add disassembly support. Add validation support. * Sqaushed in changes Marcin Ślusarz's patches: * "intel/compiler: skip adding 0 to payload address" * "intel/compiler/xe2: drop masking off top 8 bits of URB handle" Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25195>
This commit is contained in:
@@ -2222,41 +2222,95 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
|
|||||||
}
|
}
|
||||||
|
|
||||||
case BRW_SFID_URB: {
|
case BRW_SFID_URB: {
|
||||||
unsigned opcode = brw_inst_urb_opcode(devinfo, inst);
|
if (devinfo->ver >= 20) {
|
||||||
|
format(file, " (");
|
||||||
format(file, " offset %"PRIu64, brw_inst_urb_global_offset(devinfo, inst));
|
const enum lsc_opcode op = lsc_msg_desc_opcode(devinfo, imm_desc);
|
||||||
|
err |= control(file, "operation", lsc_operation,
|
||||||
space = 1;
|
op, &space);
|
||||||
|
format(file, ",");
|
||||||
err |= control(file, "urb opcode",
|
err |= control(file, "addr_size", lsc_addr_size,
|
||||||
devinfo->ver >= 7 ? gfx7_urb_opcode
|
lsc_msg_desc_addr_size(devinfo, imm_desc),
|
||||||
: gfx5_urb_opcode,
|
|
||||||
opcode, &space);
|
|
||||||
|
|
||||||
if (devinfo->ver >= 7 &&
|
|
||||||
brw_inst_urb_per_slot_offset(devinfo, inst)) {
|
|
||||||
string(file, " per-slot");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opcode == GFX8_URB_OPCODE_SIMD8_WRITE ||
|
|
||||||
opcode == GFX8_URB_OPCODE_SIMD8_READ) {
|
|
||||||
if (brw_inst_urb_channel_mask_present(devinfo, inst))
|
|
||||||
string(file, " masked");
|
|
||||||
} else if (opcode != GFX125_URB_OPCODE_FENCE) {
|
|
||||||
err |= control(file, "urb swizzle", urb_swizzle,
|
|
||||||
brw_inst_urb_swizzle_control(devinfo, inst),
|
|
||||||
&space);
|
&space);
|
||||||
}
|
|
||||||
|
|
||||||
if (devinfo->ver < 7) {
|
format(file, ",");
|
||||||
err |= control(file, "urb allocate", urb_allocate,
|
err |= control(file, "data_size", lsc_data_size,
|
||||||
brw_inst_urb_allocate(devinfo, inst), &space);
|
lsc_msg_desc_data_size(devinfo, imm_desc),
|
||||||
err |= control(file, "urb used", urb_used,
|
&space);
|
||||||
brw_inst_urb_used(devinfo, inst), &space);
|
format(file, ",");
|
||||||
}
|
if (lsc_opcode_has_cmask(op)) {
|
||||||
if (devinfo->ver < 8) {
|
err |= control(file, "component_mask",
|
||||||
err |= control(file, "urb complete", urb_complete,
|
lsc_cmask_str,
|
||||||
brw_inst_urb_complete(devinfo, inst), &space);
|
lsc_msg_desc_cmask(devinfo, imm_desc),
|
||||||
|
&space);
|
||||||
|
} else {
|
||||||
|
err |= control(file, "vector_size",
|
||||||
|
lsc_vect_size_str,
|
||||||
|
lsc_msg_desc_vect_size(devinfo, imm_desc),
|
||||||
|
&space);
|
||||||
|
if (lsc_msg_desc_transpose(devinfo, imm_desc))
|
||||||
|
format(file, ", transpose");
|
||||||
|
}
|
||||||
|
switch(op) {
|
||||||
|
case LSC_OP_LOAD_CMASK:
|
||||||
|
case LSC_OP_LOAD:
|
||||||
|
format(file, ",");
|
||||||
|
err |= control(file, "cache_load",
|
||||||
|
lsc_cache_load,
|
||||||
|
lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
|
||||||
|
&space);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
format(file, ",");
|
||||||
|
err |= control(file, "cache_store",
|
||||||
|
lsc_cache_store,
|
||||||
|
lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
|
||||||
|
&space);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc));
|
||||||
|
format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc));
|
||||||
|
format(file, " src1_len = %d", brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
|
||||||
|
err |= control(file, "address_type", lsc_addr_surface_type,
|
||||||
|
lsc_msg_desc_addr_type(devinfo, imm_desc), &space);
|
||||||
|
format(file, " )");
|
||||||
|
} else {
|
||||||
|
unsigned urb_opcode = brw_inst_urb_opcode(devinfo, inst);
|
||||||
|
|
||||||
|
format(file, " offset %"PRIu64, brw_inst_urb_global_offset(devinfo, inst));
|
||||||
|
|
||||||
|
space = 1;
|
||||||
|
|
||||||
|
err |= control(file, "urb opcode",
|
||||||
|
devinfo->ver >= 7 ? gfx7_urb_opcode
|
||||||
|
: gfx5_urb_opcode,
|
||||||
|
urb_opcode, &space);
|
||||||
|
|
||||||
|
if (devinfo->ver >= 7 &&
|
||||||
|
brw_inst_urb_per_slot_offset(devinfo, inst)) {
|
||||||
|
string(file, " per-slot");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (urb_opcode == GFX8_URB_OPCODE_SIMD8_WRITE ||
|
||||||
|
urb_opcode == GFX8_URB_OPCODE_SIMD8_READ) {
|
||||||
|
if (brw_inst_urb_channel_mask_present(devinfo, inst))
|
||||||
|
string(file, " masked");
|
||||||
|
} else if (urb_opcode != GFX125_URB_OPCODE_FENCE) {
|
||||||
|
err |= control(file, "urb swizzle", urb_swizzle,
|
||||||
|
brw_inst_urb_swizzle_control(devinfo, inst),
|
||||||
|
&space);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devinfo->ver < 7) {
|
||||||
|
err |= control(file, "urb allocate", urb_allocate,
|
||||||
|
brw_inst_urb_allocate(devinfo, inst), &space);
|
||||||
|
err |= control(file, "urb used", urb_used,
|
||||||
|
brw_inst_urb_used(devinfo, inst), &space);
|
||||||
|
}
|
||||||
|
if (devinfo->ver < 8) {
|
||||||
|
err |= control(file, "urb complete", urb_complete,
|
||||||
|
brw_inst_urb_complete(devinfo, inst), &space);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -2485,6 +2485,10 @@ send_descriptor_restrictions(const struct brw_isa_info *isa,
|
|||||||
const uint32_t desc = brw_inst_send_desc(devinfo, inst);
|
const uint32_t desc = brw_inst_send_desc(devinfo, inst);
|
||||||
|
|
||||||
switch (brw_inst_sfid(devinfo, inst)) {
|
switch (brw_inst_sfid(devinfo, inst)) {
|
||||||
|
case BRW_SFID_URB:
|
||||||
|
if (devinfo->ver < 20)
|
||||||
|
break;
|
||||||
|
FALLTHROUGH;
|
||||||
case GFX12_SFID_TGM:
|
case GFX12_SFID_TGM:
|
||||||
case GFX12_SFID_SLM:
|
case GFX12_SFID_SLM:
|
||||||
case GFX12_SFID_UGM:
|
case GFX12_SFID_UGM:
|
||||||
@@ -2500,7 +2504,7 @@ send_descriptor_restrictions(const struct brw_isa_info *isa,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB) {
|
if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB && devinfo->ver < 20) {
|
||||||
/* Gfx4 doesn't have a "header present" bit in the SEND message. */
|
/* Gfx4 doesn't have a "header present" bit in the SEND message. */
|
||||||
ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst),
|
ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst),
|
||||||
"Header must be present for all URB messages.");
|
"Header must be present for all URB messages.");
|
||||||
|
@@ -5391,7 +5391,7 @@ get_lowered_simd_width(const struct brw_compiler *compiler,
|
|||||||
|
|
||||||
case SHADER_OPCODE_URB_READ_LOGICAL:
|
case SHADER_OPCODE_URB_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||||
return MIN2(8, inst->exec_size);
|
return MIN2(devinfo->ver < 20 ? 8 : 16, inst->exec_size);
|
||||||
|
|
||||||
case SHADER_OPCODE_QUAD_SWIZZLE: {
|
case SHADER_OPCODE_QUAD_SWIZZLE: {
|
||||||
const unsigned swiz = inst->src[1].ud;
|
const unsigned swiz = inst->src[1].ud;
|
||||||
|
@@ -266,6 +266,142 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
inst->src[3] = brw_null_reg();
|
inst->src[3] = brw_null_reg();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
||||||
|
{
|
||||||
|
/* FINISHME: This is not yet implemented. */
|
||||||
|
assert(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file == BAD_FILE);
|
||||||
|
|
||||||
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
assert(devinfo->has_lsc);
|
||||||
|
|
||||||
|
/* Get the logical send arguments. */
|
||||||
|
const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
|
||||||
|
const fs_reg src = inst->src[URB_LOGICAL_SRC_DATA];
|
||||||
|
|
||||||
|
/* Calculate the total number of components of the payload. */
|
||||||
|
const unsigned src_comps = inst->components_read(URB_LOGICAL_SRC_DATA);
|
||||||
|
const unsigned src_sz = type_sz(src.type);
|
||||||
|
|
||||||
|
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
|
bld.MOV(payload, handle);
|
||||||
|
|
||||||
|
/* The low 24-bits of the URB handle is a byte offset into the URB area.
|
||||||
|
* Add the offset of the write to this value.
|
||||||
|
*
|
||||||
|
* FINISHME: What units is inst->offset? vec4, right?
|
||||||
|
*/
|
||||||
|
if (inst->offset) {
|
||||||
|
bld.ADD(payload, payload, brw_imm_ud(inst->offset * 16));
|
||||||
|
inst->offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) {
|
||||||
|
fs_reg offsets = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
||||||
|
fs_reg payload1 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
fs_reg payload2 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
|
for (unsigned i = 1; i < src_comps; i++) {
|
||||||
|
bld.MOV(payload2, offset(src, bld, i - 1));
|
||||||
|
bld.ADD(payload1, payload, offset(offsets, bld, i - 1));
|
||||||
|
|
||||||
|
fs_inst *send = bld.emit(SHADER_OPCODE_SEND,
|
||||||
|
bld.null_reg_ud(),
|
||||||
|
brw_imm_ud(0) /* desc */,
|
||||||
|
brw_imm_ud(0) /* ex_desc */,
|
||||||
|
brw_vec8_grf(0, 0) /* payload */);
|
||||||
|
|
||||||
|
const unsigned ex_mlen = (src_sz * inst->exec_size) / REG_SIZE;
|
||||||
|
|
||||||
|
send->sfid = BRW_SFID_URB;
|
||||||
|
|
||||||
|
send->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, send->exec_size,
|
||||||
|
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
|
||||||
|
1 /* num_coordinates */,
|
||||||
|
LSC_DATA_SIZE_D32, 1 /* num_channels */,
|
||||||
|
false /* transpose */,
|
||||||
|
LSC_CACHE_STORE_L1UC_L3UC,
|
||||||
|
false /* has_dest */);
|
||||||
|
|
||||||
|
send->mlen = lsc_msg_desc_src0_len(devinfo, send->desc);
|
||||||
|
send->ex_mlen = ex_mlen;
|
||||||
|
send->header_size = 0;
|
||||||
|
send->send_has_side_effects = true;
|
||||||
|
send->send_is_volatile = false;
|
||||||
|
|
||||||
|
send->resize_sources(4);
|
||||||
|
|
||||||
|
send->src[0] = brw_imm_ud(0);
|
||||||
|
send->src[1] = brw_imm_ud(0);
|
||||||
|
|
||||||
|
send->src[2] = payload1;
|
||||||
|
send->src[3] = payload2;
|
||||||
|
}
|
||||||
|
|
||||||
|
bld.MOV(payload2, offset(src, bld, src_comps - 1));
|
||||||
|
bld.ADD(payload1, payload, offset(offsets, bld, src_comps - 1));
|
||||||
|
const unsigned ex_mlen = (src_sz * inst->exec_size) / REG_SIZE;
|
||||||
|
|
||||||
|
inst->sfid = BRW_SFID_URB;
|
||||||
|
|
||||||
|
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,
|
||||||
|
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
|
||||||
|
1 /* num_coordinates */,
|
||||||
|
LSC_DATA_SIZE_D32, 1 /* num_channels */,
|
||||||
|
false /* transpose */,
|
||||||
|
LSC_CACHE_STORE_L1UC_L3UC,
|
||||||
|
false /* has_dest */);
|
||||||
|
|
||||||
|
|
||||||
|
/* Update the original instruction. */
|
||||||
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
|
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
||||||
|
inst->ex_mlen = ex_mlen;
|
||||||
|
inst->header_size = 0;
|
||||||
|
inst->send_has_side_effects = true;
|
||||||
|
inst->send_is_volatile = false;
|
||||||
|
|
||||||
|
inst->resize_sources(4);
|
||||||
|
|
||||||
|
inst->src[0] = brw_imm_ud(0);
|
||||||
|
inst->src[1] = brw_imm_ud(0);
|
||||||
|
|
||||||
|
inst->src[2] = payload1;
|
||||||
|
inst->src[3] = payload2;
|
||||||
|
} else {
|
||||||
|
fs_reg payload2 = bld.move_to_vgrf(src, src_comps);
|
||||||
|
const unsigned ex_mlen = (src_comps * src_sz * inst->exec_size) / REG_SIZE;
|
||||||
|
|
||||||
|
inst->sfid = BRW_SFID_URB;
|
||||||
|
|
||||||
|
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,
|
||||||
|
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
|
||||||
|
1 /* num_coordinates */,
|
||||||
|
LSC_DATA_SIZE_D32, src_comps /* num_channels */,
|
||||||
|
false /* transpose */,
|
||||||
|
LSC_CACHE_STORE_L1UC_L3UC,
|
||||||
|
false /* has_dest */);
|
||||||
|
|
||||||
|
|
||||||
|
/* Update the original instruction. */
|
||||||
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
|
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
||||||
|
inst->ex_mlen = ex_mlen;
|
||||||
|
inst->header_size = 0;
|
||||||
|
inst->send_has_side_effects = true;
|
||||||
|
inst->send_is_volatile = false;
|
||||||
|
|
||||||
|
inst->resize_sources(4);
|
||||||
|
|
||||||
|
inst->src[0] = brw_imm_ud(0);
|
||||||
|
inst->src[1] = brw_imm_ud(0);
|
||||||
|
|
||||||
|
inst->src[2] = payload;
|
||||||
|
inst->src[3] = payload2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
|
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
|
||||||
fs_reg *dst, fs_reg color, unsigned components)
|
fs_reg *dst, fs_reg color, unsigned components)
|
||||||
@@ -3201,7 +3337,11 @@ fs_visitor::lower_logical_sends()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||||
lower_urb_write_logical_send(ibld, inst);
|
if (devinfo->ver < 20)
|
||||||
|
lower_urb_write_logical_send(ibld, inst);
|
||||||
|
else
|
||||||
|
lower_urb_write_logical_send_xe2(ibld, inst);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
Reference in New Issue
Block a user