radv,aco: lower_pack_half_2x16
This patch also optimizes pack_half_2x16(a, 0.0). Totals from 1949 (1.43% of 136546) affected shaders (RAVEN): SGPRs: 83376 -> 83336 (-0.05%) CodeSize: 3532144 -> 3512352 (-0.56%) Instrs: 660746 -> 660682 (-0.01%); split: -0.01%, +0.00% Cycles: 6780716 -> 6780472 (-0.00%); split: -0.00%, +0.00% VMEM: 990886 -> 990883 (-0.00%); split: +0.00%, -0.00% SMEM: 150506 -> 150538 (+0.02%); split: +0.05%, -0.03% SClause: 30595 -> 30594 (-0.00%); split: -0.01%, +0.00% Copies: 40801 -> 40729 (-0.18%) PreSGPRs: 52335 -> 52341 (+0.01%); split: -0.03%, +0.04% PreVGPRs: 45104 -> 45097 (-0.02%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6777>
This commit is contained in:

committed by
Marge Bot

parent
dae1e6f756
commit
2f125908b3
@@ -2608,18 +2608,17 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_pack_half_2x16: {
|
||||
Temp src = get_alu_src(ctx, instr->src[0], 2);
|
||||
|
||||
case nir_op_pack_half_2x16_split: {
|
||||
if (dst.regClass() == v1) {
|
||||
Temp src0 = bld.tmp(v1);
|
||||
Temp src1 = bld.tmp(v1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(src0), Definition(src1), src);
|
||||
if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) {
|
||||
bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1);
|
||||
nir_const_value* val = nir_src_as_const_value(instr->src[1].src);
|
||||
if (val && val->u32 == 0 && ctx->program->chip_class <= GFX9) {
|
||||
/* upper bits zero on GFX6-GFX9 */
|
||||
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0]));
|
||||
} else if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) {
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst);
|
||||
} else {
|
||||
src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src0);
|
||||
src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src1);
|
||||
Temp src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[0]));
|
||||
Temp src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[1]));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1);
|
||||
}
|
||||
} else {
|
||||
|
@@ -837,7 +837,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
|
||||
case nir_op_i2f16:
|
||||
case nir_op_i2f32:
|
||||
case nir_op_i2f64:
|
||||
case nir_op_pack_half_2x16:
|
||||
case nir_op_pack_half_2x16_split:
|
||||
case nir_op_unpack_half_2x16_split_x:
|
||||
case nir_op_unpack_half_2x16_split_y:
|
||||
case nir_op_fddx:
|
||||
|
@@ -60,6 +60,7 @@ static const struct nir_shader_compiler_options nir_options = {
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
|
Reference in New Issue
Block a user