intel/fs: Emit code for Gen12-HP indirect compute data

Reworks:
 * Jordan: Apply to gen > 12
 * Jordan: Adjust comment about loading constants

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8342>
This commit is contained in:
Jason Ekstrand
2020-05-04 16:17:58 -05:00
committed by Jordan Justen
parent 4077ca1cc8
commit 369eab9420
2 changed files with 76 additions and 0 deletions

View File

@@ -1311,6 +1311,14 @@ enum brw_message_target {
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
#define GEN12_DATAPORT_OWORD_BLOCK_16_OWORDS 5
#define BRW_DATAPORT_OWORD_BLOCK_OWORDS(n) \
((n) == 1 ? BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW : \
(n) == 2 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : \
(n) == 4 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : \
(n) == 8 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : \
(n) == 16 ? GEN12_DATAPORT_OWORD_BLOCK_16_OWORDS : \
(abort(), ~0))
#define BRW_DATAPORT_OWORD_BLOCK_DWORDS(n) \
((n) == 4 ? BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW : \
(n) == 8 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : \

View File

@@ -1621,6 +1621,74 @@ fs_visitor::assign_curb_setup()
uint64_t used = 0;
if (stage == MESA_SHADER_COMPUTE &&
(devinfo->gen > 12 || gen_device_info_is_12hp(devinfo))) {
fs_builder ubld = bld.exec_all().group(8, 0).at(
cfg->first_block(), cfg->first_block()->start());
/* The base address for our push data is passed in as R0.0[31:6]. We
* have to mask off the bottom 6 bits.
*/
fs_reg base_addr = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.group(1, 0).AND(base_addr,
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0xffffffc0));
fs_reg header0 = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.MOV(header0, brw_imm_ud(0));
ubld.group(1, 0).SHR(component(header0, 2), base_addr, brw_imm_ud(4));
/* On Gen12-HP we load constants at the start of the program using A32
* stateless messages.
*/
for (unsigned i = 0; i < uniform_push_length;) {
unsigned num_regs = MIN2(uniform_push_length - i, 8);
assert(num_regs > 0);
num_regs = 1 << util_logbase2(num_regs);
fs_reg header;
if (i == 0) {
header = header0;
} else {
header = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.MOV(header, brw_imm_ud(0));
ubld.group(1, 0).ADD(component(header, 2),
component(header0, 2),
brw_imm_ud(i * 2));
}
fs_reg srcs[4] = {
brw_imm_ud(0), /* desc */
brw_imm_ud(0), /* ex_desc */
header, /* payload */
fs_reg(), /* payload2 */
};
fs_reg dest = retype(brw_vec8_grf(payload.num_regs + i, 0),
BRW_REGISTER_TYPE_UD);
/* This instruction has to be run SIMD16 if we're filling more than a
* single register.
*/
unsigned send_width = MIN2(16, num_regs * 8);
fs_inst *send = ubld.group(send_width, 0).emit(SHADER_OPCODE_SEND,
dest, srcs, 4);
send->sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
send->desc = brw_dp_desc(devinfo, GEN8_BTI_STATELESS_NON_COHERENT,
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
BRW_DATAPORT_OWORD_BLOCK_OWORDS(num_regs * 2));
send->header_size = 1;
send->mlen = 1;
send->size_written = num_regs * REG_SIZE;
send->send_is_volatile = true;
i += num_regs;
}
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
}
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (unsigned int i = 0; i < inst->sources; i++) {