aco: fix p_extract_vector optimization in presence of unequally sized vector operands

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4506>
This commit is contained in:
Daniel Schürmann
2020-04-10 11:52:13 +01:00
committed by Marge Bot
parent 0e4432bfba
commit 28d36d26c2

View File

@@ -954,33 +954,38 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
case aco_opcode::p_extract_vector: { /* mov */
if (!ctx.info[instr->operands[0].tempId()].is_vec())
break;
/* check if we index directly into a vector element */
Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
if (vec->definitions[0].getTemp().size() == vec->operands.size() && /* TODO: what about 64bit or other combinations? */
vec->operands[0].size() == instr->definitions[0].size()) {
const unsigned index = instr->operands[1].constantValue();
const unsigned dst_offset = index * instr->definitions[0].bytes();
unsigned offset = 0;
/* convert this extract into a mov instruction */
Operand vec_op = vec->operands[instr->operands[1].constantValue()];
bool is_vgpr = instr->definitions[0].getTemp().type() == RegType::vgpr;
aco_opcode opcode = is_vgpr ? aco_opcode::v_mov_b32 : aco_opcode::s_mov_b32;
Format format = is_vgpr ? Format::VOP1 : Format::SOP1;
instr->opcode = opcode;
instr->format = format;
while (instr->operands.size() > 1)
instr->operands.pop_back();
instr->operands[0] = vec_op;
if (vec_op.isConstant()) {
if (vec_op.isLiteral())
ctx.info[instr->definitions[0].tempId()].set_literal(vec_op.constantValue());
else if (vec_op.size() == 1)
ctx.info[instr->definitions[0].tempId()].set_constant(vec_op.constantValue());
else if (vec_op.size() == 2)
ctx.info[instr->definitions[0].tempId()].set_constant_64bit(vec_op.constantValue());
} else {
assert(vec_op.isTemp());
ctx.info[instr->definitions[0].tempId()].set_temp(vec_op.getTemp());
for (const Operand& op : vec->operands) {
if (offset < dst_offset) {
offset += op.bytes();
continue;
} else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) {
break;
}
/* convert this extract into a copy instruction */
instr->opcode = aco_opcode::p_parallelcopy;
instr->operands.pop_back();
instr->operands[0] = op;
if (op.isConstant()) {
if (op.isLiteral())
ctx.info[instr->definitions[0].tempId()].set_literal(op.constantValue());
else if (op.size() == 1)
ctx.info[instr->definitions[0].tempId()].set_constant(op.constantValue());
else if (op.size() == 2)
ctx.info[instr->definitions[0].tempId()].set_constant_64bit(op.constantValue());
} else {
assert(op.isTemp());
ctx.info[instr->definitions[0].tempId()].set_temp(op.getTemp());
}
break;
}
break;
}