diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 0841dc0fb46..a31f5855ed9 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1671,9 +1671,13 @@ brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, unsigned msg_type, bool noperspective, bool coarse_pixel_rate, - unsigned simd_mode, - unsigned slot_group) + unsigned exec_size, + unsigned group) { + assert(exec_size == 8 || exec_size == 16); + const bool simd_mode = exec_size == 16; + const bool slot_group = group >= 16; + assert(devinfo->ver >= 10 || !coarse_pixel_rate); return (SET_BITS(slot_group, 11, 11) | SET_BITS(msg_type, 13, 12) | diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index abca49894be..7e5caa21dbf 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3345,38 +3345,6 @@ brw_memory_fence(struct brw_codegen *p, brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); } -void -brw_pixel_interpolator_query(struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg mrf, - bool noperspective, - bool coarse_pixel_rate, - unsigned mode, - struct brw_reg data, - unsigned msg_length, - unsigned response_length) -{ - const struct intel_device_info *devinfo = p->devinfo; - const uint16_t exec_size = brw_get_default_exec_size(p); - const unsigned slot_group = brw_get_default_group(p) / 16; - const unsigned simd_mode = (exec_size == BRW_EXECUTE_16); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, false) | - brw_pixel_interp_desc(devinfo, mode, noperspective, coarse_pixel_rate, - simd_mode, slot_group); - - /* brw_send_indirect_message will automatically use a direct send message - * if data is actually immediate. - */ - brw_send_indirect_message(p, - GFX7_SFID_PIXEL_INTERPOLATOR, - dest, - mrf, - vec1(data), - desc, - false); -} - void brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, bool last) { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index e09e0703d9b..44b9cfaec05 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -625,12 +625,6 @@ private: struct brw_reg dst, struct brw_reg index); - void generate_pixel_interpolator_query(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg msg_data, - unsigned msg_type); - void generate_set_sample_id(fs_inst *inst, struct brw_reg dst, struct brw_reg src0, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 895816b4b9d..3b7fcc958e0 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1611,31 +1611,6 @@ fs_generator::generate_varying_pull_constant_load_gfx4(fs_inst *inst, msg_type, simd_mode, return_format)); } -void -fs_generator::generate_pixel_interpolator_query(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg msg_data, - unsigned msg_type) -{ - const bool has_payload = inst->src[0].file != BAD_FILE; - assert(msg_data.type == BRW_REGISTER_TYPE_UD); - assert(inst->size_written % REG_SIZE == 0); - - struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); - - brw_pixel_interpolator_query(p, - retype(dst, BRW_REGISTER_TYPE_UW), - /* If we don't have a payload, what we send doesn't matter */ - has_payload ? src : brw_vec8_grf(0, 0), - inst->pi_noperspective, - prog_data->per_coarse_pixel_dispatch, - msg_type, - msg_data, - has_payload ? 2 * inst->exec_size / 8 : 1, - inst->size_written / REG_SIZE); -} - /* Sets vstride=1, width=4, hstride=0 of register src1 during * the ADD instruction. */ @@ -2389,24 +2364,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, } break; - case FS_OPCODE_INTERPOLATE_AT_SAMPLE: - generate_pixel_interpolator_query(inst, dst, src[0], src[1], - GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE); - send_count++; - break; - - case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: - generate_pixel_interpolator_query(inst, dst, src[0], src[1], - GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET); - send_count++; - break; - - case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: - generate_pixel_interpolator_query(inst, dst, src[0], src[1], - GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET); - send_count++; - break; - case CS_OPCODE_CS_TERMINATE: generate_cs_terminate(inst, src[0]); send_count++; diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 3c7f8e46f59..01a8a1a1e52 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -1107,6 +1107,13 @@ namespace { abort(); } + case GFX7_SFID_PIXEL_INTERPOLATOR: + if (devinfo->ver >= 7) + return calculate_desc(info, EU_UNIT_PI, 2, 0, 0, 14 /* XXX */, 0, + 0, 90 /* XXX */, 0, 0, 0, 0); + else + abort(); + case GFX12_SFID_UGM: case GFX12_SFID_TGM: case GFX12_SFID_SLM: diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 7bda05bd716..9a2b2f62fac 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -2446,6 +2446,61 @@ lower_math_logical_send(const fs_builder &bld, fs_inst *inst) } } +static void +lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, + const struct brw_wm_prog_data *wm_prog_data) +{ + const intel_device_info *devinfo = bld.shader->devinfo; + + /* We have to send something */ + fs_reg payload = brw_vec8_grf(0, 0); + unsigned mlen = 1; + + const fs_reg desc = inst->src[1]; + + unsigned mode; + switch (inst->opcode) { + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + assert(inst->src[0].file == BAD_FILE); + mode = GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE; + break; + + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + assert(inst->src[0].file == BAD_FILE); + mode = GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET; + break; + + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + payload = inst->src[0]; + mlen = 2 * inst->exec_size / 8; + mode = GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET; + break; + + default: + unreachable("Invalid interpolator instruction"); + } + + uint32_t desc_imm = + brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective, + wm_prog_data->per_coarse_pixel_dispatch, + inst->exec_size, inst->group); + + assert(bld.shader->devinfo->ver >= 7); + inst->opcode = SHADER_OPCODE_SEND; + inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR; + inst->desc = desc_imm; + inst->ex_desc = 0; + inst->mlen = mlen; + inst->ex_mlen = 0; + inst->send_has_side_effects = false; + inst->send_is_volatile = false; + + inst->resize_sources(3); + inst->src[0] = desc; + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + inst->src[2] = payload; +} + static void lower_btd_logical_send(const fs_builder &bld, fs_inst *inst) { @@ -2749,6 +2804,13 @@ fs_visitor::lower_logical_sends() continue; } + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + lower_interpolator_logical_send(ibld, inst, + brw_wm_prog_data(prog_data)); + break; + case SHADER_OPCODE_BTD_SPAWN_LOGICAL: case SHADER_OPCODE_BTD_RETIRE_LOGICAL: lower_btd_logical_send(ibld, inst); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 26e30a63534..7a35b931b45 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -532,6 +532,10 @@ schedule_node::set_latency_gfx7(bool is_haswell) } break; + case GFX7_SFID_PIXEL_INTERPOLATOR: + latency = 50; /* TODO */ + break; + case GFX12_SFID_UGM: case GFX12_SFID_TGM: case GFX12_SFID_SLM: