diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7bb24cdb581..119458e9ca7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3686,17 +3686,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src); nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src); + aco_opcode opcode = + instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; if (const_offset && const_bits) { uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f); - aco_opcode opcode = - instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract)); break; } Temp offset = get_alu_src(ctx, instr->src[1]); Temp bits = get_alu_src(ctx, instr->src[2]); - if (instr->op == nir_op_ubfe) { + if (ctx->program->gfx_level >= GFX9) { + Temp extract = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), offset, bits); + bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract); + } else if (instr->op == nir_op_ubfe) { Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset); Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);