agx: Make p_combine take a dynamic src count
For larger vectors. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18525>
This commit is contained in:

committed by
Marge Bot

parent
ef31dceee8
commit
4f85a7be8c
@@ -50,6 +50,10 @@ agx_${opcode}${suffix}(agx_builder *b
|
||||
, agx_index dst${dest}
|
||||
% endfor
|
||||
|
||||
% if op.variable_srcs:
|
||||
, unsigned nr_srcs
|
||||
% endif
|
||||
|
||||
% for src in range(srcs):
|
||||
, agx_index src${src}
|
||||
% endfor
|
||||
@@ -65,7 +69,10 @@ agx_${opcode}${suffix}(agx_builder *b
|
||||
I->dest[${dest}] = dst${dest};
|
||||
% endfor
|
||||
|
||||
% if srcs > 0:
|
||||
% if op.variable_srcs:
|
||||
I->src = ralloc_array(I, agx_index, nr_srcs);
|
||||
I->nr_srcs = nr_srcs;
|
||||
% elif srcs > 0:
|
||||
I->src = ralloc_array(I, agx_index, ${srcs});
|
||||
I->nr_srcs = ${srcs};
|
||||
|
||||
@@ -82,7 +89,7 @@ agx_${opcode}${suffix}(agx_builder *b
|
||||
return I;
|
||||
}
|
||||
|
||||
% if dests == 1:
|
||||
% if dests == 1 and not op.variable_srcs:
|
||||
static inline agx_index
|
||||
agx_${opcode}(agx_builder *b
|
||||
|
||||
|
@@ -120,16 +120,14 @@ agx_emit_extract(agx_builder *b, agx_index vec, unsigned channel)
|
||||
}
|
||||
|
||||
static void
|
||||
agx_cache_combine(agx_builder *b, agx_index dst,
|
||||
agx_index s0, agx_index s1, agx_index s2, agx_index s3)
|
||||
agx_cache_combine(agx_builder *b, agx_index dst, unsigned nr_srcs,
|
||||
agx_index *srcs)
|
||||
{
|
||||
/* Lifetime of a hash table entry has to be at least as long as the table */
|
||||
agx_index *channels = ralloc_array(b->shader, agx_index, 4);
|
||||
agx_index *channels = ralloc_array(b->shader, agx_index, nr_srcs);
|
||||
|
||||
channels[0] = s0;
|
||||
channels[1] = s1;
|
||||
channels[2] = s2;
|
||||
channels[3] = s3;
|
||||
for (unsigned i = 0; i < nr_srcs; ++i)
|
||||
channels[i] = srcs[i];
|
||||
|
||||
_mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst),
|
||||
channels);
|
||||
@@ -142,11 +140,34 @@ agx_cache_combine(agx_builder *b, agx_index dst,
|
||||
* To optimize vector extractions, we record the individual channels
|
||||
*/
|
||||
static agx_instr *
|
||||
agx_emit_combine_to(agx_builder *b, agx_index dst,
|
||||
agx_index s0, agx_index s1, agx_index s2, agx_index s3)
|
||||
agx_emit_combine_to(agx_builder *b, agx_index dst, unsigned nr_srcs,
|
||||
agx_index *srcs)
|
||||
{
|
||||
agx_cache_combine(b, dst, s0, s1, s2, s3);
|
||||
return agx_p_combine_to(b, dst, s0, s1, s2, s3);
|
||||
agx_cache_combine(b, dst, 4, srcs);
|
||||
agx_instr *I = agx_p_combine_to(b, dst, nr_srcs);
|
||||
|
||||
agx_foreach_src(I, s)
|
||||
I->src[s] = srcs[s];
|
||||
|
||||
return I;
|
||||
}
|
||||
|
||||
static agx_index
|
||||
agx_vec4(agx_builder *b, agx_index s0, agx_index s1, agx_index s2, agx_index s3)
|
||||
{
|
||||
agx_index dst = agx_temp(b->shader, s0.size);
|
||||
agx_index idx[4] = { s0, s1, s2, s3 };
|
||||
agx_emit_combine_to(b, dst, 4, idx);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static agx_index
|
||||
agx_vec2(agx_builder *b, agx_index s0, agx_index s1)
|
||||
{
|
||||
agx_index dst = agx_temp(b->shader, s0.size);
|
||||
agx_index idx[2] = { s0, s1 };
|
||||
agx_emit_combine_to(b, dst, 2, idx);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -197,7 +218,7 @@ agx_emit_cached_split(agx_builder *b, agx_index vec, unsigned n)
|
||||
{
|
||||
agx_index dests[4] = { agx_null(), agx_null(), agx_null(), agx_null() };
|
||||
agx_emit_split(b, dests, vec, n);
|
||||
agx_cache_combine(b, vec, dests[0], dests[1], dests[2], dests[3]);
|
||||
agx_cache_combine(b, vec, n, dests);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -654,7 +675,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
* If only individual components are accessed, this combine will be dead code
|
||||
* eliminated.
|
||||
*/
|
||||
return agx_emit_combine_to(b, dst, dests[0], dests[1], dests[2], dests[3]);
|
||||
return agx_emit_combine_to(b, dst, 4, dests);
|
||||
}
|
||||
|
||||
static agx_index
|
||||
@@ -926,7 +947,10 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
|
||||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
return agx_emit_combine_to(b, dst, s0, s1, s2, s3);
|
||||
{
|
||||
agx_index idx[] = { s0, s1, s2, s3 };
|
||||
return agx_emit_combine_to(b, dst, 4, idx);
|
||||
}
|
||||
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
@@ -1049,7 +1073,7 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
|
||||
agx_mov_to(b, layer32, layer);
|
||||
|
||||
channels[nr - 1] = layer32;
|
||||
coords = agx_p_combine(b, channels[0], channels[1], channels[2], channels[3]);
|
||||
coords = agx_vec4(b, channels[0], channels[1], channels[2], channels[3]);
|
||||
} else {
|
||||
coords = index;
|
||||
}
|
||||
|
@@ -25,11 +25,16 @@ opcodes = {}
|
||||
immediates = {}
|
||||
enums = {}
|
||||
|
||||
VARIABLE = ~0
|
||||
|
||||
class Opcode(object):
|
||||
def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32):
|
||||
def __init__(self, name, dests, srcs, imms, is_float, can_eliminate,
|
||||
encoding_16, encoding_32):
|
||||
self.name = name
|
||||
self.dests = dests
|
||||
self.srcs = srcs
|
||||
self.dests = dests if dests != VARIABLE else 0
|
||||
self.srcs = srcs if srcs != VARIABLE else 0
|
||||
self.variable_srcs = (srcs == VARIABLE)
|
||||
self.variable_dests = (dests == VARIABLE)
|
||||
self.imms = imms
|
||||
self.is_float = is_float
|
||||
self.can_eliminate = can_eliminate
|
||||
@@ -57,7 +62,8 @@ class Encoding(object):
|
||||
if self.extensible:
|
||||
assert(length_long == length_short + (4 if length_short > 8 else 2))
|
||||
|
||||
def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None):
|
||||
def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False,
|
||||
can_eliminate = True, encoding_16 = None):
|
||||
encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
|
||||
encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
|
||||
|
||||
@@ -258,7 +264,7 @@ op("or", _, srcs = 2)
|
||||
# Indicates the logical end of the block, before final branches/control flow
|
||||
op("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False)
|
||||
|
||||
op("p_combine", _, srcs = 4)
|
||||
op("p_combine", _, srcs = VARIABLE)
|
||||
op("p_split", _, srcs = 1, dests = 4)
|
||||
|
||||
# Phis are special-cased in the IR as they (uniquely) can take an unbounded
|
||||
|
@@ -47,16 +47,7 @@ agx_write_registers(agx_instr *I, unsigned d)
|
||||
case AGX_OPCODE_LDCF:
|
||||
return 6;
|
||||
case AGX_OPCODE_P_COMBINE:
|
||||
{
|
||||
unsigned components = 0;
|
||||
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
if (!agx_is_null(I->src[i]))
|
||||
components = i + 1;
|
||||
}
|
||||
|
||||
return components * size;
|
||||
}
|
||||
return I->nr_srcs * size;
|
||||
default:
|
||||
return size;
|
||||
}
|
||||
@@ -325,11 +316,11 @@ agx_ra(agx_context *ctx)
|
||||
unsigned base = agx_index_to_reg(ssa_to_reg, ins->dest[0]);
|
||||
unsigned width = agx_size_align_16(ins->dest[0].size);
|
||||
|
||||
struct agx_copy copies[4];
|
||||
struct agx_copy *copies = alloca(sizeof(copies[0]) * ins->nr_srcs);
|
||||
unsigned n = 0;
|
||||
|
||||
/* Move the sources */
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
agx_foreach_src(ins, i) {
|
||||
if (agx_is_null(ins->src[i])) continue;
|
||||
assert(ins->src[i].size == ins->dest[0].size);
|
||||
|
||||
|
@@ -105,7 +105,13 @@ TEST_F(Optimizer, Copyprop)
|
||||
|
||||
TEST_F(Optimizer, InlineHazards)
|
||||
{
|
||||
NEGCASE(agx_p_combine_to(b, wx, agx_mov_imm(b, AGX_SIZE_32, 0), wy, wz, wz));
|
||||
NEGCASE({
|
||||
agx_instr *I = agx_p_combine_to(b, wx, 4);
|
||||
I->src[0] = agx_mov_imm(b, AGX_SIZE_32, 0);
|
||||
I->src[1] = wy;
|
||||
I->src[2] = wz;
|
||||
I->src[3] = wz;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, CopypropRespectsAbsNeg)
|
||||
|
Reference in New Issue
Block a user