From 2b15a90cc352fc16f257f66782db0c0f5470c0fe Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Sat, 10 Feb 2024 19:33:46 -0800 Subject: [PATCH] intel/elk: Remove unused sources from ELK_SHADER_OPCODE_SEND Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/elk/elk_eu_opcodes.h | 5 +- src/intel/compiler/elk/elk_fs.cpp | 11 ++-- .../compiler/elk/elk_fs_copy_propagation.cpp | 6 +- src/intel/compiler/elk/elk_fs_generator.cpp | 2 +- .../compiler/elk/elk_fs_reg_allocate.cpp | 5 +- src/intel/compiler/elk/elk_fs_validate.cpp | 2 +- src/intel/compiler/elk/elk_ir_performance.cpp | 12 +--- .../compiler/elk/elk_lower_logical_sends.cpp | 65 +++++++------------ 8 files changed, 39 insertions(+), 69 deletions(-) diff --git a/src/intel/compiler/elk/elk_eu_opcodes.h b/src/intel/compiler/elk/elk_eu_opcodes.h index 0b097d8175a..fbdd5ebc3bd 100644 --- a/src/intel/compiler/elk/elk_eu_opcodes.h +++ b/src/intel/compiler/elk/elk_eu_opcodes.h @@ -119,9 +119,8 @@ enum elk_opcode { ELK_SHADER_OPCODE_COS, /** - * A generic "send" opcode. The first two sources are the message - * descriptor and extended message descriptor respectively. The third - * and optional fourth sources are the message payload + * A generic "send" opcode. The first source is the descriptor and + * the second source is the message payload. */ ELK_SHADER_OPCODE_SEND, diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index 1ffa8c9422d..33d26fc7267 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -280,7 +280,7 @@ elk_fs_inst::is_control_source(unsigned arg) const return arg == 1 || arg == 2; case ELK_SHADER_OPCODE_SEND: - return arg == 0 || arg == 1; + return arg == 0; default: return false; @@ -320,7 +320,7 @@ elk_fs_inst::is_payload(unsigned arg) const return arg == 0; case ELK_SHADER_OPCODE_SEND: - return arg == 2 || arg == 3; + return arg == 1; default: return false; @@ -848,7 +848,7 @@ elk_fs_inst::size_read(int arg) const { switch (opcode) { case ELK_SHADER_OPCODE_SEND: - if (arg == 2) { + if (arg == 1) { return mlen * REG_SIZE; } break; @@ -3118,11 +3118,10 @@ elk_fs_visitor::emit_repclear_shader() if (devinfo->ver >= 7) { write = bld.emit(ELK_SHADER_OPCODE_SEND); - write->resize_sources(3); + write->resize_sources(2); write->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; write->src[0] = elk_imm_ud(0); - write->src[1] = elk_imm_ud(0); - write->src[2] = i == 0 ? color_output : header; + write->src[1] = i == 0 ? color_output : header; write->check_tdr = true; write->send_has_side_effects = true; write->desc = elk_fb_write_desc(devinfo, i, diff --git a/src/intel/compiler/elk/elk_fs_copy_propagation.cpp b/src/intel/compiler/elk/elk_fs_copy_propagation.cpp index 498222473c1..0743e7b288b 100644 --- a/src/intel/compiler/elk/elk_fs_copy_propagation.cpp +++ b/src/intel/compiler/elk/elk_fs_copy_propagation.cpp @@ -715,12 +715,8 @@ try_copy_propagate(const elk_compiler *compiler, elk_fs_inst *inst, * allow this if the registers aren't too large. */ if (inst->opcode == ELK_SHADER_OPCODE_SEND && entry->src.file == VGRF) { - int other_src = arg == 2 ? 3 : 2; - unsigned other_size = inst->src[other_src].file == VGRF ? - alloc.sizes[inst->src[other_src].nr] : - inst->size_read(other_src); unsigned prop_src_size = alloc.sizes[entry->src.nr]; - if (other_size + prop_src_size > 15) + if (prop_src_size > 15) return false; } } diff --git a/src/intel/compiler/elk/elk_fs_generator.cpp b/src/intel/compiler/elk/elk_fs_generator.cpp index c59745b085b..4cd55e32bdd 100644 --- a/src/intel/compiler/elk/elk_fs_generator.cpp +++ b/src/intel/compiler/elk/elk_fs_generator.cpp @@ -1950,7 +1950,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, break; case ELK_SHADER_OPCODE_SEND: - generate_send(inst, dst, src[0], src[2]); + generate_send(inst, dst, src[0], src[1]); send_count++; break; diff --git a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp index ca948da887c..0dc99cb4c23 100644 --- a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp +++ b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp @@ -615,7 +615,7 @@ elk_fs_reg_alloc::setup_inst_interference(const elk_fs_inst *inst) */ if (inst->eot) { const int vgrf = inst->opcode == ELK_SHADER_OPCODE_SEND ? - inst->src[2].nr : inst->src[0].nr; + inst->src[1].nr : inst->src[0].nr; const int size = DIV_ROUND_UP(fs->alloc.sizes[vgrf], reg_unit(devinfo)); int reg = ELK_MAX_GRF - size; @@ -811,9 +811,8 @@ elk_fs_reg_alloc::emit_unspill(const fs_builder &bld, _mesa_set_add(spill_insts, unspill_inst); const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT; - const elk_fs_reg ex_desc = elk_imm_ud(0); - elk_fs_reg srcs[] = { elk_imm_ud(0), ex_desc, header }; + elk_fs_reg srcs[] = { elk_imm_ud(0), header }; unspill_inst = bld.emit(ELK_SHADER_OPCODE_SEND, dst, srcs, ARRAY_SIZE(srcs)); unspill_inst->mlen = 1; diff --git a/src/intel/compiler/elk/elk_fs_validate.cpp b/src/intel/compiler/elk/elk_fs_validate.cpp index e84daddb3da..d2bb6df41bc 100644 --- a/src/intel/compiler/elk/elk_fs_validate.cpp +++ b/src/intel/compiler/elk/elk_fs_validate.cpp @@ -95,7 +95,7 @@ elk_fs_visitor::validate() foreach_block_and_inst (block, elk_fs_inst, inst, cfg) { switch (inst->opcode) { case ELK_SHADER_OPCODE_SEND: - fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1])); + fsv_assert(is_uniform(inst->src[0])); break; case ELK_OPCODE_MOV: diff --git a/src/intel/compiler/elk/elk_ir_performance.cpp b/src/intel/compiler/elk/elk_ir_performance.cpp index dcef5697c75..9753aa59777 100644 --- a/src/intel/compiler/elk/elk_ir_performance.cpp +++ b/src/intel/compiler/elk/elk_ir_performance.cpp @@ -127,16 +127,8 @@ namespace { sc(elk_has_bank_conflict(isa, inst) ? sd : 0), desc(inst->desc), sfid(inst->sfid) { - /* We typically want the maximum source size, except for split send - * messages which require the total size. - */ - if (inst->opcode == ELK_SHADER_OPCODE_SEND) { - ss = DIV_ROUND_UP(inst->size_read(2), REG_SIZE) + - DIV_ROUND_UP(inst->size_read(3), REG_SIZE); - } else { - for (unsigned i = 0; i < inst->sources; i++) - ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE)); - } + for (unsigned i = 0; i < inst->sources; i++) + ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE)); /* Convert the execution size to GRF units. */ sx = DIV_ROUND_UP(inst->exec_size * type_sz(tx), REG_SIZE); diff --git a/src/intel/compiler/elk/elk_lower_logical_sends.cpp b/src/intel/compiler/elk/elk_lower_logical_sends.cpp index 55499f1ccf5..587e2db6210 100644 --- a/src/intel/compiler/elk/elk_lower_logical_sends.cpp +++ b/src/intel/compiler/elk/elk_lower_logical_sends.cpp @@ -64,12 +64,10 @@ lower_urb_read_logical_send(const fs_builder &bld, elk_fs_inst *inst) inst->mlen = header_size; inst->send_is_volatile = true; - inst->resize_sources(4); + inst->resize_sources(2); inst->src[0] = elk_imm_ud(0); /* desc */ - inst->src[1] = elk_imm_ud(0); /* ex_desc */ - inst->src[2] = payload; - inst->src[3] = elk_null_reg(); + inst->src[1] = payload; } static void @@ -119,12 +117,10 @@ lower_urb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst) inst->mlen = length; inst->send_has_side_effects = true; - inst->resize_sources(4); + inst->resize_sources(2); inst->src[0] = elk_imm_ud(0); /* desc */ - inst->src[1] = elk_imm_ud(0); /* ex_desc */ - inst->src[2] = payload; - inst->src[3] = elk_null_reg(); + inst->src[1] = payload; } static void @@ -377,11 +373,10 @@ lower_fb_write_logical_send(const fs_builder &bld, elk_fs_inst *inst, } inst->opcode = ELK_SHADER_OPCODE_SEND; - inst->resize_sources(3); + inst->resize_sources(2); inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; inst->src[0] = desc; - inst->src[1] = elk_imm_ud(0); - inst->src[2] = payload; + inst->src[1] = payload; inst->mlen = regs_written(load); inst->header_size = header_size; inst->check_tdr = true; @@ -1102,7 +1097,6 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, elk_fs_inst *inst, elk_op simd_mode, 0 /* return_format unused on gfx7+ */); inst->src[0] = elk_imm_ud(0); - inst->src[1] = elk_imm_ud(0); } else { assert(surface_handle.file == BAD_FILE); @@ -1131,11 +1125,10 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, elk_fs_inst *inst, elk_op ubld.AND(desc, desc, elk_imm_ud(0xfff)); inst->src[0] = component(desc, 0); - inst->src[1] = elk_imm_ud(0); /* ex_desc */ } - inst->src[2] = src_payload; - inst->resize_sources(3); + inst->src[1] = src_payload; + inst->resize_sources(2); if (inst->eot) { /* EOT sampler messages don't make sense to split because it would @@ -1306,7 +1299,6 @@ setup_surface_descriptors(const fs_builder &bld, elk_fs_inst *inst, uint32_t des if (surface.file == IMM) { inst->desc = desc | (surface.ud & 0xff); inst->src[0] = elk_imm_ud(0); - inst->src[1] = elk_imm_ud(0); /* ex_desc */ } else { assert(surface_handle.file == BAD_FILE); @@ -1315,7 +1307,6 @@ setup_surface_descriptors(const fs_builder &bld, elk_fs_inst *inst, uint32_t des elk_fs_reg tmp = ubld.vgrf(ELK_REGISTER_TYPE_UD); ubld.AND(tmp, surface, elk_imm_ud(0xff)); inst->src[0] = component(tmp, 0); - inst->src[1] = elk_imm_ud(0); /* ex_desc */ } } @@ -1539,11 +1530,10 @@ lower_surface_logical_send(const fs_builder &bld, elk_fs_inst *inst) inst->sfid = sfid; setup_surface_descriptors(bld, inst, desc, surface, surface_handle); - inst->resize_sources(4); + inst->resize_sources(2); /* Finally, the payload */ - inst->src[2] = payload; - inst->src[3] = payload2; + inst->src[1] = payload; } static void @@ -1606,10 +1596,9 @@ lower_surface_block_logical_send(const fs_builder &bld, elk_fs_inst *inst) arg.ud, write); setup_surface_descriptors(bld, inst, desc, surface, surface_handle); - inst->resize_sources(4); + inst->resize_sources(2); - inst->src[2] = header; - inst->src[3] = data; + inst->src[1] = header; } static void @@ -1733,11 +1722,9 @@ lower_a64_logical_send(const fs_builder &bld, elk_fs_inst *inst) /* Set up SFID and descriptors */ inst->sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; inst->desc = desc; - inst->resize_sources(4); + inst->resize_sources(2); inst->src[0] = elk_imm_ud(0); /* desc */ - inst->src[1] = elk_imm_ud(0); /* ex_desc */ - inst->src[2] = payload; - inst->src[3] = payload2; + inst->src[1] = payload; } static void @@ -1765,8 +1752,8 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, elk_fs_inst *ins inst->mlen = inst->exec_size / 8; inst->resize_sources(3); - /* src[0] & src[1] are filled by setup_surface_descriptors() */ - inst->src[2] = ubo_offset; /* payload */ + /* src[0] is filled by setup_surface_descriptors() */ + inst->src[1] = ubo_offset; /* payload */ if (compiler->indirect_ubos_use_sampler) { const unsigned simd_mode = @@ -1812,8 +1799,8 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, elk_fs_inst *ins bld.emit(*inst); /* Offset the source */ - inst->src[2] = bld.vgrf(ELK_REGISTER_TYPE_UD); - bld.ADD(inst->src[2], ubo_offset, elk_imm_ud(c * 4)); + inst->src[1] = bld.vgrf(ELK_REGISTER_TYPE_UD); + bld.ADD(inst->src[1], ubo_offset, elk_imm_ud(c * 4)); /* Offset the destination */ inst->dst = offset(inst->dst, bld, 1); @@ -1986,10 +1973,9 @@ lower_interpolator_logical_send(const fs_builder &bld, elk_fs_inst *inst, inst->send_has_side_effects = false; inst->send_is_volatile = false; - inst->resize_sources(3); + inst->resize_sources(2); inst->src[0] = component(desc, 0); - inst->src[1] = elk_imm_ud(0); /* ex_desc */ - inst->src[2] = payload; + inst->src[1] = payload; } static void @@ -2008,10 +1994,10 @@ lower_get_buffer_size(const fs_builder &bld, elk_fs_inst *inst) inst->opcode = ELK_SHADER_OPCODE_SEND; inst->mlen = inst->exec_size / 8; - inst->resize_sources(3); + inst->resize_sources(2); - /* src[0] & src[1] are filled by setup_surface_descriptors() */ - inst->src[2] = lod; + /* src[0] is filled by setup_surface_descriptors() */ + inst->src[1] = lod; const uint32_t return_format = devinfo->ver >= 8 ? GFX8_SAMPLER_RETURN_FORMAT_32BITS : ELK_SAMPLER_RETURN_FORMAT_SINT32; @@ -2247,12 +2233,11 @@ elk_fs_visitor::lower_uniform_pull_constant_loads() elk_dp_oword_block_rw_desc(devinfo, true /* align_16B */, size_B.ud / 4, false /* write */); - inst->resize_sources(4); + inst->resize_sources(2); setup_surface_descriptors(ubld, inst, desc, surface, surface_handle); - inst->src[2] = header; - inst->src[3] = elk_fs_reg(); /* unused for reads */ + inst->src[1] = header; invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); } else {