intel/fs: Add A64 OWORD BLOCK opcodes
Based on a patch for OWORD BLOCK READ from Jason Ekstrand. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7448>
This commit is contained in:

committed by
Marge Bot

parent
eb03f29655
commit
d3d2b73fa3
@@ -443,6 +443,8 @@ static const char *const dp_dc1_msg_type_hsw[32] = {
|
||||
[GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ] = "DC A64 scattered read",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ] = "DC A64 untyped surface read",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP] = "DC A64 untyped atomic op",
|
||||
[GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ] = "DC A64 oword block read",
|
||||
[GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE] = "DC A64 oword block write",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE] = "DC A64 untyped surface write",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE] = "DC A64 scattered write",
|
||||
[GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] =
|
||||
|
@@ -806,6 +806,27 @@ brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
|
||||
msg_type, msg_control);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_dp_a64_oword_block_rw_desc(const struct gen_device_info *devinfo,
|
||||
bool align_16B,
|
||||
unsigned num_dwords,
|
||||
bool write)
|
||||
{
|
||||
/* Writes can only have addresses aligned by OWORDs (16 Bytes). */
|
||||
assert(!write || align_16B);
|
||||
|
||||
unsigned msg_type =
|
||||
write ? GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
|
||||
GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
|
||||
|
||||
unsigned msg_control =
|
||||
SET_BITS(!align_16B, 4, 3) |
|
||||
SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
|
||||
|
||||
return brw_dp_desc(devinfo, GEN8_BTI_STATELESS_NON_COHERENT,
|
||||
msg_type, msg_control);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
|
||||
* Skylake PRM).
|
||||
|
@@ -427,6 +427,9 @@ enum opcode {
|
||||
SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
|
||||
SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
|
||||
SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
|
||||
SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL,
|
||||
SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
|
||||
SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
|
||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
|
||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||
@@ -1410,6 +1413,8 @@ enum brw_message_target {
|
||||
#define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12
|
||||
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ 0x14
|
||||
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE 0x15
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a
|
||||
#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
|
||||
|
@@ -824,9 +824,21 @@ fs_inst::components_read(unsigned i) const
|
||||
return 1;
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||
assert(src[2].file == IMM);
|
||||
return 1;
|
||||
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||
assert(src[2].file == IMM);
|
||||
if (i == 1) { /* data to write */
|
||||
const unsigned comps = src[2].ud / exec_size;
|
||||
assert(comps > 0);
|
||||
return comps;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||
assert(src[2].file == IMM);
|
||||
return i == 1 ? src[2].ud : 1;
|
||||
@@ -5626,6 +5638,23 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
inst->resize_sources(4);
|
||||
}
|
||||
|
||||
static fs_reg
|
||||
emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)
|
||||
{
|
||||
const fs_builder ubld = bld.exec_all().group(8, 0);
|
||||
fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld.MOV(header, brw_imm_ud(0));
|
||||
|
||||
/* Use a 2-wide MOV to fill out the address */
|
||||
assert(type_sz(addr.type) == 8 && addr.stride == 0);
|
||||
fs_reg addr_vec2 = addr;
|
||||
addr_vec2.type = BRW_REGISTER_TYPE_UD;
|
||||
addr_vec2.stride = 1;
|
||||
ubld.group(2, 0).MOV(header, addr_vec2);
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
@@ -5645,8 +5674,23 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
emit_predicate_on_sample_mask(bld, inst);
|
||||
|
||||
fs_reg payload, payload2;
|
||||
unsigned mlen, ex_mlen = 0;
|
||||
if (devinfo->gen >= 9) {
|
||||
unsigned mlen, ex_mlen = 0, header_size = 0;
|
||||
if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL ||
|
||||
inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL ||
|
||||
inst->opcode == SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL) {
|
||||
assert(devinfo->gen >= 9);
|
||||
|
||||
/* OWORD messages only take a scalar address in a header */
|
||||
mlen = 1;
|
||||
header_size = 1;
|
||||
payload = emit_a64_oword_block_header(bld, addr);
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL) {
|
||||
ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;
|
||||
payload2 = retype(bld.move_to_vgrf(src, src_comps),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
}
|
||||
} else if (devinfo->gen >= 9) {
|
||||
/* On Skylake and above, we have SENDS */
|
||||
mlen = 2 * (inst->exec_size / 8);
|
||||
ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;
|
||||
@@ -5683,6 +5727,27 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
true /* write */);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
||||
desc = brw_dp_a64_oword_block_rw_desc(devinfo,
|
||||
true, /* align_16B */
|
||||
arg, /* num_dwords */
|
||||
false /* write */);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||
desc = brw_dp_a64_oword_block_rw_desc(devinfo,
|
||||
false, /* align_16B */
|
||||
arg, /* num_dwords */
|
||||
false /* write */);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||
desc = brw_dp_a64_oword_block_rw_desc(devinfo,
|
||||
true, /* align_16B */
|
||||
arg, /* num_dwords */
|
||||
true /* write */);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||
desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, inst->exec_size,
|
||||
arg, /* bit_size */
|
||||
@@ -5722,7 +5787,7 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = mlen;
|
||||
inst->ex_mlen = ex_mlen;
|
||||
inst->header_size = 0;
|
||||
inst->header_size = header_size;
|
||||
inst->send_has_side_effects = has_side_effects;
|
||||
inst->send_is_volatile = !has_side_effects;
|
||||
|
||||
@@ -5956,6 +6021,9 @@ fs_visitor::lower_logical_sends()
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
@@ -6557,6 +6625,12 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||
return devinfo->gen <= 8 ? 8 : MIN2(16, inst->exec_size);
|
||||
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||
assert(inst->exec_size <= 16);
|
||||
return inst->exec_size;
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||
|
@@ -501,6 +501,8 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
||||
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ:
|
||||
case GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE:
|
||||
case GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ:
|
||||
case GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ:
|
||||
case GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE:
|
||||
/* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
|
||||
latency = 300;
|
||||
break;
|
||||
|
@@ -303,6 +303,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
||||
return "untyped_surface_write_logical";
|
||||
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
||||
return "a64_untyped_read_logical";
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
||||
return "a64_oword_block_read_logical";
|
||||
case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||
return "a64_unaligned_oword_block_read_logical";
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||
return "a64_oword_block_write_logical";
|
||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||
return "a64_untyped_write_logical";
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||
@@ -1088,6 +1094,7 @@ backend_instruction::has_side_effects() const
|
||||
case SHADER_OPCODE_RND_MODE:
|
||||
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
|
||||
case FS_OPCODE_SCHEDULING_FENCE:
|
||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||
return true;
|
||||
default:
|
||||
return eot;
|
||||
|
Reference in New Issue
Block a user