intel/fs: Make logical URB write instructions more like other logical instructions
The changes to fs_visitor::validate() helped track down a place where I initially forgot to convert a message to the new sources layout. This had caused a different validation failure in dEQP-GLES31.functional.tessellation.tesscoord.triangles_equal_spacing, but this were not detected until after SENDs were lowered. Tiger Lake, Ice Lake, and Skylake had similar results. (Ice Lake shown) total instructions in shared programs: 19951145 -> 19951133 (<.01%) instructions in affected programs: 2429 -> 2417 (-0.49%) helped: 8 / HURT: 0 total cycles in shared programs: 858904152 -> 858862331 (<.01%) cycles in affected programs: 5702652 -> 5660831 (-0.73%) helped: 2138 / HURT: 1255 Broadwell total cycles in shared programs: 904869459 -> 904835501 (<.01%) cycles in affected programs: 7686744 -> 7652786 (-0.44%) helped: 2861 / HURT: 2050 Tiger Lake, Ice Lake, and Skylake had similar results. (Ice Lake shown) Instructions in all programs: 141442369 -> 141442032 (-0.0%) Instructions helped: 337 Cycles in all programs: 9099270231 -> 9099036492 (-0.0%) Cycles helped: 40661 Cycles hurt: 28606 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17605>
This commit is contained in:
@@ -950,6 +950,17 @@ enum rt_logical_srcs {
|
||||
RT_LOGICAL_NUM_SRCS
|
||||
};
|
||||
|
||||
enum urb_logical_srcs {
|
||||
URB_LOGICAL_SRC_HANDLE,
|
||||
URB_LOGICAL_SRC_PER_SLOT_OFFSETS,
|
||||
URB_LOGICAL_SRC_CHANNEL_MASK,
|
||||
/** Data to be written. BAD_FILE for reads. */
|
||||
URB_LOGICAL_SRC_DATA,
|
||||
|
||||
URB_LOGICAL_NUM_SRCS
|
||||
};
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Allow brw_urb_write_flags enums to be ORed together.
|
||||
|
@@ -863,6 +863,17 @@ fs_inst::components_read(unsigned i) const
|
||||
return 1;
|
||||
}
|
||||
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
|
||||
if (i == URB_LOGICAL_SRC_DATA)
|
||||
return mlen - 1 -
|
||||
unsigned(src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) -
|
||||
unsigned(src[URB_LOGICAL_SRC_CHANNEL_MASK].file != BAD_FILE);
|
||||
else
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
@@ -891,10 +902,6 @@ fs_inst::size_read(int arg) const
|
||||
break;
|
||||
|
||||
case FS_OPCODE_FB_READ:
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_READ_LOGICAL:
|
||||
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||
@@ -1546,17 +1553,17 @@ fs_visitor::emit_gs_thread_end()
|
||||
break;
|
||||
}
|
||||
}
|
||||
fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
|
||||
abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)));
|
||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, hdr);
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 1;
|
||||
} else {
|
||||
fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
|
||||
sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
sources[1] = this->final_gs_vertex_count;
|
||||
abld.LOAD_PAYLOAD(payload, sources, 2, 2);
|
||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, payload);
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
|
||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 2;
|
||||
}
|
||||
inst->eot = true;
|
||||
@@ -6676,16 +6683,12 @@ fs_visitor::run_tcs()
|
||||
}
|
||||
|
||||
/* Emit EOT write; set TR DS Cache bit */
|
||||
fs_reg srcs[3] = {
|
||||
fs_reg(get_tcs_output_urb_handle()),
|
||||
fs_reg(brw_imm_ud(WRITEMASK_X << 16)),
|
||||
fs_reg(brw_imm_ud(0)),
|
||||
};
|
||||
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
|
||||
bld.LOAD_PAYLOAD(payload, srcs, 3, 2);
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = get_tcs_output_urb_handle();
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
|
||||
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
bld.null_reg_ud(), payload);
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 3;
|
||||
inst->eot = true;
|
||||
|
||||
|
@@ -2341,27 +2341,27 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
|
||||
}
|
||||
|
||||
/* Store the control data bits in the message payload and send it. */
|
||||
unsigned mlen = 2;
|
||||
if (channel_mask.file != BAD_FILE)
|
||||
mlen += 4; /* channel masks, plus 3 extra copies of the data */
|
||||
if (per_slot_offset.file != BAD_FILE)
|
||||
mlen++;
|
||||
const unsigned header_size = 1 + unsigned(channel_mask.file != BAD_FILE) +
|
||||
unsigned(per_slot_offset.file != BAD_FILE);
|
||||
|
||||
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, mlen);
|
||||
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, mlen);
|
||||
unsigned i = 0;
|
||||
sources[i++] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
if (per_slot_offset.file != BAD_FILE)
|
||||
sources[i++] = per_slot_offset;
|
||||
if (channel_mask.file != BAD_FILE)
|
||||
sources[i++] = channel_mask;
|
||||
while (i < mlen) {
|
||||
sources[i++] = this->control_data_bits;
|
||||
}
|
||||
/* If there are channel masks, add 3 extra copies of the data. */
|
||||
const unsigned length = 1 + 3 * unsigned(channel_mask.file != BAD_FILE);
|
||||
|
||||
abld.LOAD_PAYLOAD(payload, sources, mlen, mlen);
|
||||
fs_inst *inst = abld.emit(opcode, reg_undef, payload);
|
||||
inst->mlen = mlen;
|
||||
fs_reg sources[4];
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
|
||||
sources[i] = this->control_data_bits;
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, alloc.allocate(length),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
|
||||
|
||||
fs_inst *inst = abld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = header_size + length;
|
||||
/* We need to increment Global Offset by 256-bits to make room for
|
||||
* Broadwell's extra "Vertex Count" payload at the beginning of the
|
||||
* URB entry. Since this is an OWord message, Global Offset is counted
|
||||
@@ -3046,15 +3046,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
||||
fs_reg indirect_offset = get_indirect_offset(instr);
|
||||
unsigned imm_offset = instr->const_index[0];
|
||||
unsigned mask = instr->const_index[1];
|
||||
unsigned header_regs = 0;
|
||||
struct brw_reg output_handles = get_tcs_output_urb_handle();
|
||||
|
||||
fs_reg srcs[7];
|
||||
srcs[header_regs++] = output_handles;
|
||||
|
||||
if (indirect_offset.file != BAD_FILE) {
|
||||
srcs[header_regs++] = indirect_offset;
|
||||
}
|
||||
|
||||
if (mask == 0)
|
||||
break;
|
||||
@@ -3068,8 +3059,9 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
||||
unsigned first_component = nir_intrinsic_component(instr);
|
||||
mask = mask << first_component;
|
||||
|
||||
fs_reg mask_reg;
|
||||
if (mask != WRITEMASK_XYZW) {
|
||||
srcs[header_regs++] = brw_imm_ud(mask << 16);
|
||||
mask_reg = brw_imm_ud(mask << 16);
|
||||
opcode = indirect_offset.file != BAD_FILE ?
|
||||
SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL :
|
||||
SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL;
|
||||
@@ -3079,21 +3071,30 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
||||
SHADER_OPCODE_URB_WRITE_LOGICAL;
|
||||
}
|
||||
|
||||
fs_reg sources[4];
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
if (!(mask & (1 << (i + first_component))))
|
||||
continue;
|
||||
|
||||
srcs[header_regs + i + first_component] = offset(value, bld, i);
|
||||
sources[i + first_component] = offset(value, bld, i);
|
||||
}
|
||||
|
||||
unsigned mlen = header_regs + num_components + first_component;
|
||||
fs_reg payload =
|
||||
bld.vgrf(BRW_REGISTER_TYPE_UD, mlen);
|
||||
bld.LOAD_PAYLOAD(payload, srcs, mlen, header_regs);
|
||||
unsigned header_size = 1 + unsigned(indirect_offset.file != BAD_FILE) +
|
||||
unsigned(mask != WRITEMASK_XYZW);
|
||||
const unsigned length = num_components + first_component;
|
||||
|
||||
fs_inst *inst = bld.emit(opcode, bld.null_reg_ud(), payload);
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = get_tcs_output_urb_handle();
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask_reg;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, alloc.allocate(length),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
|
||||
|
||||
fs_inst *inst = bld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->offset = imm_offset;
|
||||
inst->mlen = mlen;
|
||||
inst->mlen = header_size + length;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -43,6 +43,20 @@ fs_visitor::validate()
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
foreach_block_and_inst (block, fs_inst, inst, cfg) {
|
||||
if (inst->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) {
|
||||
const unsigned header_size = 1 +
|
||||
unsigned(inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) +
|
||||
unsigned(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file != BAD_FILE);
|
||||
|
||||
unsigned data_size = 0;
|
||||
for (unsigned i = header_size, j = 0; i < inst->mlen; i++, j++) {
|
||||
fsv_assert(type_sz(offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j).type) == 4);
|
||||
data_size++;
|
||||
}
|
||||
|
||||
fsv_assert(header_size + data_size == inst->mlen);
|
||||
}
|
||||
|
||||
if (inst->dst.file == VGRF) {
|
||||
fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <=
|
||||
alloc.sizes[inst->dst.nr]);
|
||||
|
@@ -935,22 +935,15 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
if (length == 8 || (length > 0 && slot == last_slot))
|
||||
flush = true;
|
||||
if (flush) {
|
||||
fs_reg *payload_sources =
|
||||
ralloc_array(mem_ctx, fs_reg, length + header_size);
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(length + header_size),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
payload_sources[0] = urb_handle;
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
|
||||
if (opcode == SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL)
|
||||
payload_sources[1] = per_slot_offsets;
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, alloc.allocate(length),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
|
||||
|
||||
memcpy(&payload_sources[header_size], sources,
|
||||
length * sizeof sources[0]);
|
||||
|
||||
abld.LOAD_PAYLOAD(payload, payload_sources, length + header_size,
|
||||
header_size);
|
||||
|
||||
fs_inst *inst = abld.emit(opcode, reg_undef, payload);
|
||||
fs_inst *inst = abld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
|
||||
/* For ICL WA 1805992985 one needs additional write in the end. */
|
||||
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL)
|
||||
@@ -985,10 +978,17 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
if (stage == MESA_SHADER_GEOMETRY)
|
||||
return;
|
||||
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
|
||||
bld.exec_all().MOV(payload, urb_handle);
|
||||
fs_reg uniform_urb_handle = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, payload);
|
||||
bld.exec_all().MOV(uniform_urb_handle, urb_handle);
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
||||
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->eot = true;
|
||||
inst->mlen = 2;
|
||||
inst->offset = 1;
|
||||
@@ -1002,14 +1002,16 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
* all 8 lanes must valid.
|
||||
*/
|
||||
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL) {
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(6), BRW_REGISTER_TYPE_UD);
|
||||
fs_reg uniform_urb_handle = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
|
||||
fs_reg uniform_mask = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(4), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* Workaround requires all 8 channels (lanes) to be valid. This is
|
||||
* understood to mean they all need to be alive. First trick is to find
|
||||
* a live channel and copy its urb handle for all the other channels to
|
||||
* make sure all handles are valid.
|
||||
*/
|
||||
bld.exec_all().MOV(payload, bld.emit_uniformize(urb_handle));
|
||||
bld.exec_all().MOV(uniform_urb_handle, bld.emit_uniformize(urb_handle));
|
||||
|
||||
/* Second trick is to use masked URB write where one can tell the HW to
|
||||
* actually write data only for selected channels even though all are
|
||||
@@ -1025,14 +1027,19 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
* 4 slots data. All are explicitly zeros in order to to keep the MBZ
|
||||
* area written as zeros.
|
||||
*/
|
||||
bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0x10000u));
|
||||
bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x10000u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 0), brw_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 4), brw_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 5), brw_imm_ud(0u));
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
||||
|
||||
fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
reg_undef, payload);
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->eot = true;
|
||||
inst->mlen = 6;
|
||||
inst->offset = 0;
|
||||
|
@@ -73,8 +73,27 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||
|
||||
assert(inst->header_size == 0);
|
||||
|
||||
fs_reg *payload_sources = new fs_reg[inst->mlen];
|
||||
fs_reg payload = fs_reg(VGRF, bld.shader->alloc.allocate(inst->mlen),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
|
||||
unsigned header_size = 0;
|
||||
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE];
|
||||
if (per_slot_present)
|
||||
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
||||
|
||||
if (channel_mask_present)
|
||||
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
|
||||
|
||||
for (unsigned i = header_size, j = 0; i < inst->mlen; i++, j++)
|
||||
payload_sources[i] = offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j);
|
||||
|
||||
bld.LOAD_PAYLOAD(payload, payload_sources, inst->mlen, header_size);
|
||||
|
||||
delete [] payload_sources;
|
||||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->header_size = 1;
|
||||
inst->header_size = header_size;
|
||||
inst->dst = brw_null_reg();
|
||||
|
||||
inst->sfid = BRW_SFID_URB;
|
||||
@@ -88,13 +107,11 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||
inst->ex_mlen = 0;
|
||||
inst->send_has_side_effects = true;
|
||||
|
||||
fs_reg tmp = inst->src[0];
|
||||
|
||||
inst->resize_sources(4);
|
||||
|
||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
inst->src[2] = tmp;
|
||||
inst->src[2] = payload;
|
||||
inst->src[3] = brw_null_reg();
|
||||
}
|
||||
|
||||
|
@@ -892,25 +892,25 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
|
||||
fs_builder bld8 = bld.group(8, q);
|
||||
|
||||
fs_reg payload_srcs[6];
|
||||
unsigned p = 0;
|
||||
|
||||
payload_srcs[p++] = urb_handle;
|
||||
payload_srcs[p++] = brw_imm_ud(first_mask << 16);
|
||||
const unsigned header_size = p;
|
||||
fs_reg payload_srcs[4];
|
||||
unsigned length = 0;
|
||||
|
||||
for (unsigned i = 0; i < comp_shift; i++)
|
||||
payload_srcs[p++] = reg_undef;
|
||||
payload_srcs[length++] = reg_undef;
|
||||
|
||||
for (unsigned c = 0; c < first_comps; c++)
|
||||
payload_srcs[p++] = quarter(offset(src, bld, c), q);
|
||||
payload_srcs[length++] = quarter(offset(src, bld, c), q);
|
||||
|
||||
fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p);
|
||||
bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size);
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(first_mask << 16);
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
reg_undef, payload);
|
||||
inst->mlen = p;
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 2 + length;
|
||||
inst->offset = urb_global_offset;
|
||||
assert(inst->offset < 2048);
|
||||
}
|
||||
@@ -923,22 +923,22 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
|
||||
fs_builder bld8 = bld.group(8, q);
|
||||
|
||||
fs_reg payload_srcs[6];
|
||||
unsigned p = 0;
|
||||
|
||||
payload_srcs[p++] = urb_handle;
|
||||
payload_srcs[p++] = brw_imm_ud(second_mask << 16);
|
||||
const unsigned header_size = p;
|
||||
fs_reg payload_srcs[4];
|
||||
unsigned length = 0;
|
||||
|
||||
for (unsigned c = 0; c < second_comps; c++)
|
||||
payload_srcs[p++] = quarter(offset(src, bld, c + first_comps), q);
|
||||
payload_srcs[length++] = quarter(offset(src, bld, c + first_comps), q);
|
||||
|
||||
fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p);
|
||||
bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size);
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(second_mask << 16);
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
reg_undef, payload);
|
||||
inst->mlen = p;
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 2 + length;
|
||||
inst->offset = urb_global_offset;
|
||||
assert(inst->offset < 2048);
|
||||
}
|
||||
@@ -988,21 +988,23 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||
|
||||
bld8.SHR(off, off, brw_imm_ud(2));
|
||||
|
||||
fs_reg payload_srcs[7];
|
||||
int x = 0;
|
||||
payload_srcs[x++] = urb_handle;
|
||||
payload_srcs[x++] = off;
|
||||
payload_srcs[x++] = mask;
|
||||
fs_reg payload_srcs[4];
|
||||
unsigned length = 0;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
payload_srcs[x++] = quarter(src_comp, q);
|
||||
payload_srcs[length++] = quarter(src_comp, q);
|
||||
|
||||
fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, x);
|
||||
bld8.LOAD_PAYLOAD(payload, payload_srcs, x, 3);
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL,
|
||||
reg_undef, payload);
|
||||
inst->mlen = x;
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 3 + length;
|
||||
inst->offset = 0;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user