aco: use v_cvt_pk_u8_f32 for f2u8
Foz-DB Navi31: Totals from 42 (0.05% of 79395) affected shaders: Instrs: 3253747 -> 3248867 (-0.15%); split: -0.15%, +0.00% CodeSize: 16690136 -> 16661772 (-0.17%); split: -0.17%, +0.00% VGPRs: 4176 -> 4128 (-1.15%) Latency: 18485157 -> 18479752 (-0.03%); split: -0.03%, +0.00% InvThroughput: 3659404 -> 3658222 (-0.03%); split: -0.03%, +0.00% Copies: 231891 -> 228145 (-1.62%) VALU: 1785800 -> 1782054 (-0.21%) Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29532>
This commit is contained in:
@@ -3264,7 +3264,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
}
|
||||
}
|
||||
} else if (instr->src[0].src.ssa->bit_size == 32) {
|
||||
emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f32, dst);
|
||||
if (dst.regClass() == v1b && ctx->program->gfx_level >= GFX11)
|
||||
bld.vop3(aco_opcode::p_v_cvt_pk_u8_f32, Definition(dst),
|
||||
get_alu_src(ctx, instr->src[0]));
|
||||
else
|
||||
emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f32, dst);
|
||||
} else {
|
||||
emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f64, dst);
|
||||
}
|
||||
|
@@ -433,7 +433,8 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
|
||||
instr->opcode != aco_opcode::v_permlanex16_b32 &&
|
||||
instr->opcode != aco_opcode::v_permlane64_b32 &&
|
||||
instr->opcode != aco_opcode::v_readlane_b32_e64 &&
|
||||
instr->opcode != aco_opcode::v_writelane_b32_e64;
|
||||
instr->opcode != aco_opcode::v_writelane_b32_e64 &&
|
||||
instr->opcode != aco_opcode::p_v_cvt_pk_u8_f32;
|
||||
}
|
||||
|
||||
aco_ptr<Instruction>
|
||||
|
@@ -2990,6 +2990,14 @@ lower_to_hw_instr(Program* program)
|
||||
ctx.instructions.emplace_back(std::move(instr));
|
||||
|
||||
emit_set_mode(bld, block->fp_mode, set_round, false);
|
||||
} else if (instr->opcode == aco_opcode::p_v_cvt_pk_u8_f32) {
|
||||
Definition def = instr->definitions[0];
|
||||
VALU_instruction& valu =
|
||||
bld.vop3(aco_opcode::v_cvt_pk_u8_f32, def, instr->operands[0],
|
||||
Operand::c32(def.physReg().byte()), Operand(def.physReg(), v1))
|
||||
->valu();
|
||||
valu.abs = instr->valu().abs;
|
||||
valu.neg = instr->valu().neg;
|
||||
} else if (instr->isMIMG() && instr->mimg().strict_wqm) {
|
||||
lower_image_sample(&ctx, instr);
|
||||
ctx.instructions.emplace_back(std::move(instr));
|
||||
|
@@ -1283,6 +1283,7 @@ VOP3 = {
|
||||
("v_sad_u16", False, False, dst(1), src(1, 1, 1), op(0x15c, gfx8=0x1db, gfx10=0x15c, gfx11=0x224)),
|
||||
("v_sad_u32", False, False, dst(1), src(1, 1, 1), op(0x15d, gfx8=0x1dc, gfx10=0x15d, gfx11=0x225)),
|
||||
("v_cvt_pk_u8_f32", True, False, dst(1), src(1, 1, 1), op(0x15e, gfx8=0x1dd, gfx10=0x15e, gfx11=0x226)),
|
||||
("p_v_cvt_pk_u8_f32", True, False, dst(1), src(1), op(-1)),
|
||||
("v_div_fixup_f32", True, True, dst(1), src(1, 1, 1), op(0x15f, gfx8=0x1de, gfx10=0x15f, gfx11=0x227)),
|
||||
("v_div_fixup_f64", True, True, dst(2), src(2, 2, 2), op(0x160, gfx8=0x1df, gfx10=0x160, gfx11=0x228)),
|
||||
("v_lshl_b64", False, False, dst(2), src(2, 1), op(0x161, gfx8=-1), InstrClass.Valu64),
|
||||
|
@@ -622,7 +622,7 @@ get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr
|
||||
if (instr->isVALU()) {
|
||||
assert(rc.bytes() <= 2);
|
||||
|
||||
if (can_use_SDWA(gfx_level, instr, false))
|
||||
if (can_use_SDWA(gfx_level, instr, false) || instr->opcode == aco_opcode::p_v_cvt_pk_u8_f32)
|
||||
return std::make_pair(rc.bytes(), rc.bytes());
|
||||
|
||||
unsigned bytes_written = 4u;
|
||||
@@ -693,6 +693,9 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
|
||||
amd_gfx_level gfx_level = program->gfx_level;
|
||||
assert(instr->definitions[0].bytes() <= 2);
|
||||
|
||||
if (instr->opcode == aco_opcode::p_v_cvt_pk_u8_f32)
|
||||
return;
|
||||
|
||||
if (reg.byte() == 0 && allow_16bit_write && instr_is_16bit(gfx_level, instr->opcode))
|
||||
return;
|
||||
|
||||
|
@@ -1253,6 +1253,7 @@ validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>
|
||||
case aco_opcode::global_load_short_d16_hi:
|
||||
case aco_opcode::ds_read_u8_d16_hi:
|
||||
case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
|
||||
case aco_opcode::p_v_cvt_pk_u8_f32: return true;
|
||||
default: break;
|
||||
}
|
||||
|
||||
@@ -1269,6 +1270,9 @@ get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr,
|
||||
return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
|
||||
if (instr->isVALU() || instr->isVINTRP()) {
|
||||
assert(def.bytes() <= 2);
|
||||
if (instr->opcode == aco_opcode::p_v_cvt_pk_u8_f32)
|
||||
return 1;
|
||||
|
||||
if (instr->isSDWA())
|
||||
return instr->sdwa().dst_sel.size();
|
||||
|
||||
|
Reference in New Issue
Block a user