intel/fs: Make logical URB read instructions more like other logical instructions
No shader-db changes on any Intel platform Fossil-db results: Tiger Lake Instructions in all programs: 156926440 -> 156926470 (+0.0%) Instructions hurt: 15 Cycles in all programs: 7513099349 -> 7513099402 (+0.0%) Cycles hurt: 15 Ice Lake and Skylake had similar results. (Ice Lake shown) Cycles in all programs: 9099036492 -> 9099036489 (-0.0%) Cycles helped: 1 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17605>
This commit is contained in:
@@ -902,8 +902,6 @@ fs_inst::size_read(int arg) const
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case FS_OPCODE_FB_READ:
|
case FS_OPCODE_FB_READ:
|
||||||
case SHADER_OPCODE_URB_READ_LOGICAL:
|
|
||||||
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
|
|
||||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||||
if (arg == 0)
|
if (arg == 0)
|
||||||
|
@@ -2619,11 +2619,15 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
|
|||||||
fs_reg indirect_offset = get_nir_src(offset_src);
|
fs_reg indirect_offset = get_nir_src(offset_src);
|
||||||
|
|
||||||
if (nir_src_is_const(offset_src)) {
|
if (nir_src_is_const(offset_src)) {
|
||||||
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
|
srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle;
|
||||||
|
|
||||||
/* Constant indexing - use global offset. */
|
/* Constant indexing - use global offset. */
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
unsigned read_components = num_components + first_component;
|
unsigned read_components = num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, icp_handle);
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs,
|
||||||
|
ARRAY_SIZE(srcs));
|
||||||
inst->size_written = read_components *
|
inst->size_written = read_components *
|
||||||
tmp.component_size(inst->exec_size);
|
tmp.component_size(inst->exec_size);
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
@@ -2631,7 +2635,8 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
|
|||||||
offset(tmp, bld, i + first_component));
|
offset(tmp, bld, i + first_component));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, icp_handle);
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, srcs,
|
||||||
|
ARRAY_SIZE(srcs));
|
||||||
inst->size_written = num_components *
|
inst->size_written = num_components *
|
||||||
dst.component_size(inst->exec_size);
|
dst.component_size(inst->exec_size);
|
||||||
}
|
}
|
||||||
@@ -2639,14 +2644,16 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
|
|||||||
inst->mlen = 1;
|
inst->mlen = 1;
|
||||||
} else {
|
} else {
|
||||||
/* Indirect indexing - use per-slot offsets as well. */
|
/* Indirect indexing - use per-slot offsets as well. */
|
||||||
const fs_reg srcs[] = { icp_handle, indirect_offset };
|
|
||||||
unsigned read_components = num_components + first_component;
|
unsigned read_components = num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||||
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
|
||||||
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
|
srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle;
|
||||||
|
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
|
||||||
|
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||||
payload);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = read_components *
|
inst->size_written = read_components *
|
||||||
tmp.component_size(inst->exec_size);
|
tmp.component_size(inst->exec_size);
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
@@ -2654,7 +2661,8 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
|
|||||||
offset(tmp, bld, i + first_component));
|
offset(tmp, bld, i + first_component));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst, payload);
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
||||||
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = num_components *
|
inst->size_written = num_components *
|
||||||
dst.component_size(inst->exec_size);
|
dst.component_size(inst->exec_size);
|
||||||
}
|
}
|
||||||
@@ -2923,38 +2931,42 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||||||
unsigned num_components = instr->num_components;
|
unsigned num_components = instr->num_components;
|
||||||
unsigned first_component = nir_intrinsic_component(instr);
|
unsigned first_component = nir_intrinsic_component(instr);
|
||||||
|
|
||||||
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
|
srcs[URB_LOGICAL_SRC_HANDLE] = icp_handle;
|
||||||
|
|
||||||
if (indirect_offset.file == BAD_FILE) {
|
if (indirect_offset.file == BAD_FILE) {
|
||||||
/* Constant indexing - use global offset. */
|
/* Constant indexing - use global offset. */
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
unsigned read_components = num_components + first_component;
|
unsigned read_components = num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, icp_handle);
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp, srcs,
|
||||||
|
ARRAY_SIZE(srcs));
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
bld.MOV(offset(dst, bld, i),
|
bld.MOV(offset(dst, bld, i),
|
||||||
offset(tmp, bld, i + first_component));
|
offset(tmp, bld, i + first_component));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, icp_handle);
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst, srcs,
|
||||||
|
ARRAY_SIZE(srcs));
|
||||||
}
|
}
|
||||||
inst->offset = imm_offset;
|
inst->offset = imm_offset;
|
||||||
inst->mlen = 1;
|
inst->mlen = 1;
|
||||||
} else {
|
} else {
|
||||||
/* Indirect indexing - use per-slot offsets as well. */
|
/* Indirect indexing - use per-slot offsets as well. */
|
||||||
const fs_reg srcs[] = { icp_handle, indirect_offset };
|
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
|
||||||
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
|
||||||
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
|
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
unsigned read_components = num_components + first_component;
|
unsigned read_components = num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||||
payload);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
bld.MOV(offset(dst, bld, i),
|
bld.MOV(offset(dst, bld, i),
|
||||||
offset(tmp, bld, i + first_component));
|
offset(tmp, bld, i + first_component));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
||||||
payload);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
}
|
}
|
||||||
inst->offset = imm_offset;
|
inst->offset = imm_offset;
|
||||||
inst->mlen = 2;
|
inst->mlen = 2;
|
||||||
@@ -2993,12 +3005,15 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||||||
bld.MOV(patch_handle, output_handles);
|
bld.MOV(patch_handle, output_handles);
|
||||||
|
|
||||||
{
|
{
|
||||||
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
|
srcs[URB_LOGICAL_SRC_HANDLE] = patch_handle;
|
||||||
|
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
unsigned read_components =
|
unsigned read_components =
|
||||||
instr->num_components + first_component;
|
instr->num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
||||||
patch_handle);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = read_components * REG_SIZE;
|
inst->size_written = read_components * REG_SIZE;
|
||||||
for (unsigned i = 0; i < instr->num_components; i++) {
|
for (unsigned i = 0; i < instr->num_components; i++) {
|
||||||
bld.MOV(offset(dst, bld, i),
|
bld.MOV(offset(dst, bld, i),
|
||||||
@@ -3006,7 +3021,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst,
|
||||||
patch_handle);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = instr->num_components * REG_SIZE;
|
inst->size_written = instr->num_components * REG_SIZE;
|
||||||
}
|
}
|
||||||
inst->offset = imm_offset;
|
inst->offset = imm_offset;
|
||||||
@@ -3014,15 +3029,16 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* Indirect indexing - use per-slot offsets as well. */
|
/* Indirect indexing - use per-slot offsets as well. */
|
||||||
const fs_reg srcs[] = { output_handles, indirect_offset };
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
srcs[URB_LOGICAL_SRC_HANDLE] = output_handles;
|
||||||
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
|
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
|
||||||
|
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
unsigned read_components =
|
unsigned read_components =
|
||||||
instr->num_components + first_component;
|
instr->num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||||
payload);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = read_components * REG_SIZE;
|
inst->size_written = read_components * REG_SIZE;
|
||||||
for (unsigned i = 0; i < instr->num_components; i++) {
|
for (unsigned i = 0; i < instr->num_components; i++) {
|
||||||
bld.MOV(offset(dst, bld, i),
|
bld.MOV(offset(dst, bld, i),
|
||||||
@@ -3030,7 +3046,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
||||||
payload);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = instr->num_components * REG_SIZE;
|
inst->size_written = instr->num_components * REG_SIZE;
|
||||||
}
|
}
|
||||||
inst->offset = imm_offset;
|
inst->offset = imm_offset;
|
||||||
@@ -3151,18 +3167,16 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
|
|||||||
(imm_offset / 2) + 1);
|
(imm_offset / 2) + 1);
|
||||||
} else {
|
} else {
|
||||||
/* Replicate the patch handle to all enabled channels */
|
/* Replicate the patch handle to all enabled channels */
|
||||||
const fs_reg srcs[] = {
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)
|
srcs[URB_LOGICAL_SRC_HANDLE] =
|
||||||
};
|
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD);
|
||||||
fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
|
|
||||||
bld.LOAD_PAYLOAD(patch_handle, srcs, ARRAY_SIZE(srcs), 0);
|
|
||||||
|
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
unsigned read_components =
|
unsigned read_components =
|
||||||
instr->num_components + first_component;
|
instr->num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dest.type, read_components);
|
fs_reg tmp = bld.vgrf(dest.type, read_components);
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
||||||
patch_handle);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = read_components * REG_SIZE;
|
inst->size_written = read_components * REG_SIZE;
|
||||||
for (unsigned i = 0; i < instr->num_components; i++) {
|
for (unsigned i = 0; i < instr->num_components; i++) {
|
||||||
bld.MOV(offset(dest, bld, i),
|
bld.MOV(offset(dest, bld, i),
|
||||||
@@ -3170,7 +3184,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dest,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dest,
|
||||||
patch_handle);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->size_written = instr->num_components * REG_SIZE;
|
inst->size_written = instr->num_components * REG_SIZE;
|
||||||
}
|
}
|
||||||
inst->mlen = 1;
|
inst->mlen = 1;
|
||||||
@@ -3184,26 +3198,25 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
|
|||||||
* two double components.
|
* two double components.
|
||||||
*/
|
*/
|
||||||
unsigned num_components = instr->num_components;
|
unsigned num_components = instr->num_components;
|
||||||
const fs_reg srcs[] = {
|
|
||||||
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
indirect_offset
|
srcs[URB_LOGICAL_SRC_HANDLE] =
|
||||||
};
|
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD);
|
||||||
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
|
||||||
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
|
|
||||||
|
|
||||||
if (first_component != 0) {
|
if (first_component != 0) {
|
||||||
unsigned read_components =
|
unsigned read_components =
|
||||||
num_components + first_component;
|
num_components + first_component;
|
||||||
fs_reg tmp = bld.vgrf(dest.type, read_components);
|
fs_reg tmp = bld.vgrf(dest.type, read_components);
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||||
payload);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
bld.MOV(offset(dest, bld, i),
|
bld.MOV(offset(dest, bld, i),
|
||||||
offset(tmp, bld, i + first_component));
|
offset(tmp, bld, i + first_component));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dest,
|
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dest,
|
||||||
payload);
|
srcs, ARRAY_SIZE(srcs));
|
||||||
}
|
}
|
||||||
inst->mlen = 2;
|
inst->mlen = 2;
|
||||||
inst->offset = imm_offset;
|
inst->offset = imm_offset;
|
||||||
|
@@ -36,13 +36,24 @@ lower_urb_read_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
|
|
||||||
assert(inst->size_written % REG_SIZE == 0);
|
assert(inst->size_written % REG_SIZE == 0);
|
||||||
assert(inst->src[0].type == BRW_REGISTER_TYPE_UD);
|
assert(inst->header_size == 0);
|
||||||
assert(inst->src[0].file == FIXED_GRF || inst->src[0].file == VGRF);
|
|
||||||
|
fs_reg *payload_sources = new fs_reg[inst->mlen];
|
||||||
|
fs_reg payload = fs_reg(VGRF, bld.shader->alloc.allocate(inst->mlen),
|
||||||
|
BRW_REGISTER_TYPE_F);
|
||||||
|
|
||||||
|
unsigned header_size = 0;
|
||||||
|
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE];
|
||||||
|
if (per_slot_present)
|
||||||
|
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
||||||
|
|
||||||
|
bld.LOAD_PAYLOAD(payload, payload_sources, inst->mlen, header_size);
|
||||||
|
|
||||||
|
delete [] payload_sources;
|
||||||
|
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->header_size = 1;
|
inst->header_size = header_size;
|
||||||
|
|
||||||
inst->sfid = BRW_SFID_URB;
|
inst->sfid = BRW_SFID_URB;
|
||||||
inst->desc = brw_urb_desc(devinfo,
|
inst->desc = brw_urb_desc(devinfo,
|
||||||
@@ -55,13 +66,11 @@ lower_urb_read_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||||||
inst->ex_mlen = 0;
|
inst->ex_mlen = 0;
|
||||||
inst->send_is_volatile = true;
|
inst->send_is_volatile = true;
|
||||||
|
|
||||||
fs_reg tmp = inst->src[0];
|
|
||||||
|
|
||||||
inst->resize_sources(4);
|
inst->resize_sources(4);
|
||||||
|
|
||||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||||
inst->src[2] = tmp;
|
inst->src[2] = payload;
|
||||||
inst->src[3] = brw_null_reg();
|
inst->src[3] = brw_null_reg();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1037,8 +1037,11 @@ emit_urb_direct_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||||||
|
|
||||||
fs_builder ubld8 = bld.group(8, 0).exec_all();
|
fs_builder ubld8 = bld.group(8, 0).exec_all();
|
||||||
fs_reg data = ubld8.vgrf(BRW_REGISTER_TYPE_UD, num_regs);
|
fs_reg data = ubld8.vgrf(BRW_REGISTER_TYPE_UD, num_regs);
|
||||||
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
|
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||||
|
|
||||||
fs_inst *inst = ubld8.emit(SHADER_OPCODE_URB_READ_LOGICAL, data, urb_handle);
|
fs_inst *inst = ubld8.emit(SHADER_OPCODE_URB_READ_LOGICAL, data,
|
||||||
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->mlen = 1;
|
inst->mlen = 1;
|
||||||
inst->offset = urb_global_offset;
|
inst->offset = urb_global_offset;
|
||||||
assert(inst->offset < 2048);
|
assert(inst->offset < 2048);
|
||||||
@@ -1093,17 +1096,14 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||||||
|
|
||||||
bld8.SHR(off, off, brw_imm_ud(2));
|
bld8.SHR(off, off, brw_imm_ud(2));
|
||||||
|
|
||||||
fs_reg payload_srcs[2];
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
payload_srcs[0] = urb_handle;
|
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||||
payload_srcs[1] = off;
|
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
|
||||||
|
|
||||||
fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
|
||||||
bld8.LOAD_PAYLOAD(payload, payload_srcs, 2, 2);
|
|
||||||
|
|
||||||
fs_reg data = bld8.vgrf(BRW_REGISTER_TYPE_UD, 4);
|
fs_reg data = bld8.vgrf(BRW_REGISTER_TYPE_UD, 4);
|
||||||
|
|
||||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL,
|
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL,
|
||||||
data, payload);
|
data, srcs, ARRAY_SIZE(srcs));
|
||||||
inst->mlen = 2;
|
inst->mlen = 2;
|
||||||
inst->offset = 0;
|
inst->offset = 0;
|
||||||
inst->size_written = 4 * REG_SIZE;
|
inst->size_written = 4 * REG_SIZE;
|
||||||
|
Reference in New Issue
Block a user