ir3: Make MOVMSK use repeat
MOVMSK is a bit of a special case, because it takes multiple cycles (and therefore reduces the nops needed if it's between some other assigner and consumer) however weird things happen if you try to start reading the first component while it isn't finished yet. On balance making it use repeat seems to result in a fewer special cases. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>
This commit is contained in:
@@ -1677,6 +1677,7 @@ ir3_MOVMSK(struct ir3_block *block, unsigned components)
|
||||
struct ir3_register *dst = __ssa_dst(instr);
|
||||
dst->flags |= IR3_REG_SHARED;
|
||||
dst->wrmask = (1 << components) - 1;
|
||||
instr->repeat = components - 1;
|
||||
return instr;
|
||||
}
|
||||
|
||||
|
@@ -247,6 +247,12 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner, struct ir3_instruction
|
||||
if ((src->flags & IR3_REG_RELATIV) || (dst->flags & IR3_REG_RELATIV))
|
||||
return delay;
|
||||
|
||||
/* MOVMSK seems to require that all users wait until the entire
|
||||
* instruction is finished, so just bail here.
|
||||
*/
|
||||
if (assigner->opc == OPC_MOVMSK)
|
||||
return delay;
|
||||
|
||||
/* TODO: Handle the combination of (rpt) and different component sizes
|
||||
* better like below. This complicates things significantly because the
|
||||
* components don't line up.
|
||||
|
@@ -800,6 +800,7 @@ cat1_movmsk: T_OP_MOVMSK '.' T_W {
|
||||
new_instr(OPC_MOVMSK);
|
||||
instr->cat1.src_type = TYPE_U32;
|
||||
instr->cat1.dst_type = TYPE_U32;
|
||||
instr->repeat = $3 - 1;
|
||||
} dst_reg {
|
||||
instr->dsts[0]->wrmask = (1 << $3) - 1;
|
||||
}
|
||||
|
@@ -457,16 +457,6 @@ SOFTWARE.
|
||||
<pattern low="50" high="52">011</pattern> <!-- SRC_TYPE==u32 -->
|
||||
<pattern low="53" high="54">00</pattern>
|
||||
<pattern low="57" high="58">11</pattern> <!-- OPC -->
|
||||
|
||||
<!--
|
||||
TODO in ir3 things are encoded w/ instr->repeat==0 and repeat field is
|
||||
reconstructed from wrmask.. but I'm not sure if that is actually accurate
|
||||
(in terms of how delay slots work).. for now, work around that to match
|
||||
the existing stuff:
|
||||
-->
|
||||
<encode>
|
||||
<map name="REPEAT">util_last_bit(src->dsts[0]->wrmask) - 1</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user