aco: Use s_pack_ll for s_bfe operand on GFX9+.
Foz-DB Navi21: Totals from 1 (0.00% of 134913) affected shaders: CodeSize: 340 -> 336 (-1.18%) Instrs: 77 -> 76 (-1.30%) Latency: 1065 -> 1063 (-0.19%) InvThroughput: 4260 -> 4252 (-0.19%) Signed-off-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18936>
This commit is contained in:
@@ -3686,17 +3686,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
|
|
||||||
nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src);
|
nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src);
|
||||||
nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src);
|
nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src);
|
||||||
|
aco_opcode opcode =
|
||||||
|
instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
|
||||||
if (const_offset && const_bits) {
|
if (const_offset && const_bits) {
|
||||||
uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f);
|
uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f);
|
||||||
aco_opcode opcode =
|
|
||||||
instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
|
|
||||||
bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract));
|
bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
Temp offset = get_alu_src(ctx, instr->src[1]);
|
Temp offset = get_alu_src(ctx, instr->src[1]);
|
||||||
Temp bits = get_alu_src(ctx, instr->src[2]);
|
Temp bits = get_alu_src(ctx, instr->src[2]);
|
||||||
if (instr->op == nir_op_ubfe) {
|
if (ctx->program->gfx_level >= GFX9) {
|
||||||
|
Temp extract = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), offset, bits);
|
||||||
|
bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract);
|
||||||
|
} else if (instr->op == nir_op_ubfe) {
|
||||||
Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
|
Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
|
||||||
Temp masked =
|
Temp masked =
|
||||||
bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);
|
bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);
|
||||||
|
Reference in New Issue
Block a user