intel/fs: Add _LOGICAL versions of URB messages

The lowering is currently fake.  It just changes the opcode from the
_LOGICAL version to the non-_LOGICAL version.

v2: Remove some rebase cruft.  's/gfx8_//;s/simd8_/' in
brw_instruction_name.  Both suggested by Ken.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17379>
This commit is contained in:
Ian Romanick
2022-06-27 15:22:03 -07:00
committed by Marge Bot
parent 07b9bfacc7
commit a477587b4a
7 changed files with 114 additions and 41 deletions

View File

@@ -469,6 +469,14 @@ enum opcode {
/** /**
* Gfx8+ SIMD8 URB Read messages. * Gfx8+ SIMD8 URB Read messages.
*/ */
SHADER_OPCODE_URB_READ_LOGICAL,
SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL,
SHADER_OPCODE_URB_WRITE_LOGICAL,
SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL,
SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL,
SHADER_OPCODE_URB_READ_SIMD8, SHADER_OPCODE_URB_READ_SIMD8,
SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,

View File

@@ -909,6 +909,12 @@ fs_inst::size_read(int arg) const
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_LOGICAL:
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
case SHADER_OPCODE_URB_READ_LOGICAL:
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
if (arg == 0) if (arg == 0)
@@ -1542,10 +1548,10 @@ fs_visitor::emit_gs_thread_end()
if (gs_prog_data->static_vertex_count != -1) { if (gs_prog_data->static_vertex_count != -1) {
foreach_in_list_reverse(fs_inst, prev, &this->instructions) { foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
if (prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8 || if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL ||
prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED || prev->opcode == SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL ||
prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT || prev->opcode == SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL ||
prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) { prev->opcode == SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL) {
prev->eot = true; prev->eot = true;
/* Delete now dead instructions. */ /* Delete now dead instructions. */
@@ -1561,7 +1567,7 @@ fs_visitor::emit_gs_thread_end()
} }
fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD))); abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)));
inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, hdr); inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, hdr);
inst->mlen = 1; inst->mlen = 1;
} else { } else {
fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2); fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2);
@@ -1569,7 +1575,7 @@ fs_visitor::emit_gs_thread_end()
sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
sources[1] = this->final_gs_vertex_count; sources[1] = this->final_gs_vertex_count;
abld.LOAD_PAYLOAD(payload, sources, 2, 2); abld.LOAD_PAYLOAD(payload, sources, 2, 2);
inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, payload);
inst->mlen = 2; inst->mlen = 2;
} }
inst->eot = true; inst->eot = true;
@@ -5083,6 +5089,12 @@ get_lowered_simd_width(const struct brw_compiler *compiler,
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_LOGICAL:
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
case SHADER_OPCODE_URB_WRITE_LOGICAL:
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
return MIN2(8, inst->exec_size); return MIN2(8, inst->exec_size);
case SHADER_OPCODE_QUAD_SWIZZLE: { case SHADER_OPCODE_QUAD_SWIZZLE: {
@@ -6697,7 +6709,7 @@ fs_visitor::run_tcs()
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 3); fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
bld.LOAD_PAYLOAD(payload, srcs, 3, 2); bld.LOAD_PAYLOAD(payload, srcs, 3, 2);
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
bld.null_reg_ud(), payload); bld.null_reg_ud(), payload);
inst->mlen = 3; inst->mlen = 3;
inst->eot = true; inst->eot = true;

View File

@@ -2285,17 +2285,17 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
* Similarly, if the control data header is <= 32 bits, there is only one * Similarly, if the control data header is <= 32 bits, there is only one
* DWord, so we can skip channel masks. * DWord, so we can skip channel masks.
*/ */
enum opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8; enum opcode opcode = SHADER_OPCODE_URB_WRITE_LOGICAL;
fs_reg channel_mask, per_slot_offset; fs_reg channel_mask, per_slot_offset;
if (gs_compile->control_data_header_size_bits > 32) { if (gs_compile->control_data_header_size_bits > 32) {
opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED; opcode = SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL;
channel_mask = vgrf(glsl_type::uint_type); channel_mask = vgrf(glsl_type::uint_type);
} }
if (gs_compile->control_data_header_size_bits > 128) { if (gs_compile->control_data_header_size_bits > 128) {
opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT; opcode = SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL;
per_slot_offset = vgrf(glsl_type::uint_type); per_slot_offset = vgrf(glsl_type::uint_type);
} }
@@ -2308,7 +2308,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
* *
* dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
*/ */
if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) { if (opcode != SHADER_OPCODE_URB_WRITE_LOGICAL) {
fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
@@ -2616,7 +2616,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
if (first_component != 0) { if (first_component != 0) {
unsigned read_components = num_components + first_component; unsigned read_components = num_components + first_component;
fs_reg tmp = bld.vgrf(dst.type, read_components); fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, icp_handle);
inst->size_written = read_components * inst->size_written = read_components *
tmp.component_size(inst->exec_size); tmp.component_size(inst->exec_size);
for (unsigned i = 0; i < num_components; i++) { for (unsigned i = 0; i < num_components; i++) {
@@ -2624,7 +2624,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, icp_handle);
inst->size_written = num_components * inst->size_written = num_components *
dst.component_size(inst->exec_size); dst.component_size(inst->exec_size);
} }
@@ -2638,7 +2638,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
if (first_component != 0) { if (first_component != 0) {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
payload); payload);
inst->size_written = read_components * inst->size_written = read_components *
tmp.component_size(inst->exec_size); tmp.component_size(inst->exec_size);
@@ -2647,7 +2647,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst, payload);
inst->size_written = num_components * inst->size_written = num_components *
dst.component_size(inst->exec_size); dst.component_size(inst->exec_size);
} }
@@ -2921,13 +2921,13 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
if (first_component != 0) { if (first_component != 0) {
unsigned read_components = num_components + first_component; unsigned read_components = num_components + first_component;
fs_reg tmp = bld.vgrf(dst.type, read_components); fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, icp_handle);
for (unsigned i = 0; i < num_components; i++) { for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(dst, bld, i), bld.MOV(offset(dst, bld, i),
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, icp_handle);
} }
inst->offset = imm_offset; inst->offset = imm_offset;
inst->mlen = 1; inst->mlen = 1;
@@ -2939,14 +2939,14 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
if (first_component != 0) { if (first_component != 0) {
unsigned read_components = num_components + first_component; unsigned read_components = num_components + first_component;
fs_reg tmp = bld.vgrf(dst.type, read_components); fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
payload); payload);
for (unsigned i = 0; i < num_components; i++) { for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(dst, bld, i), bld.MOV(offset(dst, bld, i),
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
payload); payload);
} }
inst->offset = imm_offset; inst->offset = imm_offset;
@@ -2990,7 +2990,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
unsigned read_components = unsigned read_components =
instr->num_components + first_component; instr->num_components + first_component;
fs_reg tmp = bld.vgrf(dst.type, read_components); fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
patch_handle); patch_handle);
inst->size_written = read_components * REG_SIZE; inst->size_written = read_components * REG_SIZE;
for (unsigned i = 0; i < instr->num_components; i++) { for (unsigned i = 0; i < instr->num_components; i++) {
@@ -2998,7 +2998,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst,
patch_handle); patch_handle);
inst->size_written = instr->num_components * REG_SIZE; inst->size_written = instr->num_components * REG_SIZE;
} }
@@ -3014,7 +3014,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
unsigned read_components = unsigned read_components =
instr->num_components + first_component; instr->num_components + first_component;
fs_reg tmp = bld.vgrf(dst.type, read_components); fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
payload); payload);
inst->size_written = read_components * REG_SIZE; inst->size_written = read_components * REG_SIZE;
for (unsigned i = 0; i < instr->num_components; i++) { for (unsigned i = 0; i < instr->num_components; i++) {
@@ -3022,7 +3022,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
payload); payload);
inst->size_written = instr->num_components * REG_SIZE; inst->size_written = instr->num_components * REG_SIZE;
} }
@@ -3064,12 +3064,12 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
if (mask != WRITEMASK_XYZW) { if (mask != WRITEMASK_XYZW) {
srcs[header_regs++] = brw_imm_ud(mask << 16); srcs[header_regs++] = brw_imm_ud(mask << 16);
opcode = indirect_offset.file != BAD_FILE ? opcode = indirect_offset.file != BAD_FILE ?
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT : SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL :
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED; SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL;
} else { } else {
opcode = indirect_offset.file != BAD_FILE ? opcode = indirect_offset.file != BAD_FILE ?
SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT : SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL :
SHADER_OPCODE_URB_WRITE_SIMD8; SHADER_OPCODE_URB_WRITE_LOGICAL;
} }
for (unsigned i = 0; i < num_components; i++) { for (unsigned i = 0; i < num_components; i++) {
@@ -3153,7 +3153,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
unsigned read_components = unsigned read_components =
instr->num_components + first_component; instr->num_components + first_component;
fs_reg tmp = bld.vgrf(dest.type, read_components); fs_reg tmp = bld.vgrf(dest.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
patch_handle); patch_handle);
inst->size_written = read_components * REG_SIZE; inst->size_written = read_components * REG_SIZE;
for (unsigned i = 0; i < instr->num_components; i++) { for (unsigned i = 0; i < instr->num_components; i++) {
@@ -3161,7 +3161,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dest,
patch_handle); patch_handle);
inst->size_written = instr->num_components * REG_SIZE; inst->size_written = instr->num_components * REG_SIZE;
} }
@@ -3187,14 +3187,14 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
unsigned read_components = unsigned read_components =
num_components + first_component; num_components + first_component;
fs_reg tmp = bld.vgrf(dest.type, read_components); fs_reg tmp = bld.vgrf(dest.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
payload); payload);
for (unsigned i = 0; i < num_components; i++) { for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(dest, bld, i), bld.MOV(offset(dest, bld, i),
offset(tmp, bld, i + first_component)); offset(tmp, bld, i + first_component));
} }
} else { } else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dest,
payload); payload);
} }
inst->mlen = 2; inst->mlen = 2;

View File

@@ -774,7 +774,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
else else
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8; opcode opcode = SHADER_OPCODE_URB_WRITE_LOGICAL;
int header_size = 1; int header_size = 1;
fs_reg per_slot_offsets; fs_reg per_slot_offsets;
@@ -794,7 +794,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
* Vertex Count. SIMD8 mode processes 8 different primitives at a * Vertex Count. SIMD8 mode processes 8 different primitives at a
* time; each may output a different number of vertices. * time; each may output a different number of vertices.
*/ */
opcode = SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT; opcode = SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL;
header_size++; header_size++;
/* The URB offset is in 128-bit units, so we need to multiply by 2 */ /* The URB offset is in 128-bit units, so we need to multiply by 2 */
@@ -941,7 +941,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
BRW_REGISTER_TYPE_F); BRW_REGISTER_TYPE_F);
payload_sources[0] = urb_handle; payload_sources[0] = urb_handle;
if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT) if (opcode == SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL)
payload_sources[1] = per_slot_offsets; payload_sources[1] = per_slot_offsets;
memcpy(&payload_sources[header_size], sources, memcpy(&payload_sources[header_size], sources,
@@ -988,7 +988,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD); fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
bld.exec_all().MOV(payload, urb_handle); bld.exec_all().MOV(payload, urb_handle);
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, payload);
inst->eot = true; inst->eot = true;
inst->mlen = 2; inst->mlen = 2;
inst->offset = 1; inst->offset = 1;
@@ -1031,7 +1031,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
bld.exec_all().MOV(offset(payload, bld, 4), brw_imm_ud(0u)); bld.exec_all().MOV(offset(payload, bld, 4), brw_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 5), brw_imm_ud(0u)); bld.exec_all().MOV(offset(payload, bld, 5), brw_imm_ud(0u));
fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
reg_undef, payload); reg_undef, payload);
inst->eot = true; inst->eot = true;
inst->mlen = 6; inst->mlen = 6;

View File

@@ -30,6 +30,18 @@
using namespace brw; using namespace brw;
static void
lower_urb_read_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
{
inst->opcode = op;
}
static void
lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
{
inst->opcode = op;
}
static void static void
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
fs_reg *dst, fs_reg color, unsigned components) fs_reg *dst, fs_reg color, unsigned components)
@@ -2629,6 +2641,26 @@ fs_visitor::lower_logical_sends()
lower_trace_ray_logical_send(ibld, inst); lower_trace_ray_logical_send(ibld, inst);
break; break;
case SHADER_OPCODE_URB_READ_LOGICAL:
lower_urb_read_logical_send(ibld, inst, SHADER_OPCODE_URB_READ_SIMD8);
break;
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
lower_urb_read_logical_send(ibld, inst, SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT);
break;
case SHADER_OPCODE_URB_WRITE_LOGICAL:
lower_urb_write_logical_send(ibld, inst, SHADER_OPCODE_URB_WRITE_SIMD8);
break;
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
lower_urb_write_logical_send(ibld, inst, SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT);
break;
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
lower_urb_write_logical_send(ibld, inst, SHADER_OPCODE_URB_WRITE_SIMD8_MASKED);
break;
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
lower_urb_write_logical_send(ibld, inst, SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT);
break;
default: default:
continue; continue;
} }

View File

@@ -908,7 +908,8 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p); fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p);
bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size); bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size);
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, reg_undef, payload); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
reg_undef, payload);
inst->mlen = p; inst->mlen = p;
inst->offset = urb_global_offset; inst->offset = urb_global_offset;
assert(inst->offset < 2048); assert(inst->offset < 2048);
@@ -935,7 +936,8 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p); fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p);
bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size); bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size);
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, reg_undef, payload); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
reg_undef, payload);
inst->mlen = p; inst->mlen = p;
inst->offset = urb_global_offset; inst->offset = urb_global_offset;
assert(inst->offset < 2048); assert(inst->offset < 2048);
@@ -998,7 +1000,8 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, x); fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, x);
bld8.LOAD_PAYLOAD(payload, payload_srcs, x, 3); bld8.LOAD_PAYLOAD(payload, payload_srcs, x, 3);
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT, reg_undef, payload); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL,
reg_undef, payload);
inst->mlen = x; inst->mlen = x;
inst->offset = 0; inst->offset = 0;
} }
@@ -1033,7 +1036,7 @@ emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
fs_builder ubld8 = bld.group(8, 0).exec_all(); fs_builder ubld8 = bld.group(8, 0).exec_all();
fs_reg data = ubld8.vgrf(BRW_REGISTER_TYPE_UD, num_regs); fs_reg data = ubld8.vgrf(BRW_REGISTER_TYPE_UD, num_regs);
fs_inst *inst = ubld8.emit(SHADER_OPCODE_URB_READ_SIMD8, data, urb_handle); fs_inst *inst = ubld8.emit(SHADER_OPCODE_URB_READ_LOGICAL, data, urb_handle);
inst->mlen = 1; inst->mlen = 1;
inst->offset = urb_global_offset; inst->offset = urb_global_offset;
assert(inst->offset < 2048); assert(inst->offset < 2048);
@@ -1097,7 +1100,8 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
fs_reg data = bld8.vgrf(BRW_REGISTER_TYPE_UD, 4); fs_reg data = bld8.vgrf(BRW_REGISTER_TYPE_UD, 4);
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, data, payload); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL,
data, payload);
inst->mlen = 2; inst->mlen = 2;
inst->offset = 0; inst->offset = 0;
inst->size_written = 4 * REG_SIZE; inst->size_written = 4 * REG_SIZE;

View File

@@ -384,6 +384,19 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
return "urb_read_simd8_per_slot"; return "urb_read_simd8_per_slot";
case SHADER_OPCODE_URB_WRITE_LOGICAL:
return "urb_write_logical";
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
return "urb_write_per_slot_logical";
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
return "urb_write_masked_logical";
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
return "urb_write_masked_per_slot_logical";
case SHADER_OPCODE_URB_READ_LOGICAL:
return "urb_read_logical";
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
return "urb_read_per_slot_logical";
case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel"; return "find_live_channel";
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
@@ -1139,6 +1152,10 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_LOGICAL:
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
case FS_OPCODE_FB_WRITE: case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_WRITE_LOGICAL: case FS_OPCODE_FB_WRITE_LOGICAL:
case FS_OPCODE_REP_FB_WRITE: case FS_OPCODE_REP_FB_WRITE: