From cc06b7e00d8b5b5201c01d6efbfd204641e9d768 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 3 Oct 2022 11:18:40 +0200 Subject: [PATCH] aco: Use s_pack_ll for s_bfe operand on GFX9+. Foz-DB Navi21: Totals from 1 (0.00% of 134913) affected shaders: CodeSize: 340 -> 336 (-1.18%) Instrs: 77 -> 76 (-1.30%) Latency: 1065 -> 1063 (-0.19%) InvThroughput: 4260 -> 4252 (-0.19%) Signed-off-by: Georg Lehmann Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7bb24cdb581..119458e9ca7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3686,17 +3686,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src); nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src); + aco_opcode opcode = + instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; if (const_offset && const_bits) { uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f); - aco_opcode opcode = - instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract)); break; } Temp offset = get_alu_src(ctx, instr->src[1]); Temp bits = get_alu_src(ctx, instr->src[2]); - if (instr->op == nir_op_ubfe) { + if (ctx->program->gfx_level >= GFX9) { + Temp extract = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), offset, bits); + bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract); + } else if (instr->op == nir_op_ubfe) { Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset); Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);