agx: Make p_combine take a dynamic src count

For larger vectors.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18525>
This commit is contained in:
Alyssa Rosenzweig
2022-09-09 14:32:01 -04:00
committed by Marge Bot
parent ef31dceee8
commit 4f85a7be8c
5 changed files with 69 additions and 35 deletions

View File

@@ -50,6 +50,10 @@ agx_${opcode}${suffix}(agx_builder *b
, agx_index dst${dest}
% endfor
% if op.variable_srcs:
, unsigned nr_srcs
% endif
% for src in range(srcs):
, agx_index src${src}
% endfor
@@ -65,7 +69,10 @@ agx_${opcode}${suffix}(agx_builder *b
I->dest[${dest}] = dst${dest};
% endfor
% if srcs > 0:
% if op.variable_srcs:
I->src = ralloc_array(I, agx_index, nr_srcs);
I->nr_srcs = nr_srcs;
% elif srcs > 0:
I->src = ralloc_array(I, agx_index, ${srcs});
I->nr_srcs = ${srcs};
@@ -82,7 +89,7 @@ agx_${opcode}${suffix}(agx_builder *b
return I;
}
% if dests == 1:
% if dests == 1 and not op.variable_srcs:
static inline agx_index
agx_${opcode}(agx_builder *b

View File

@@ -120,16 +120,14 @@ agx_emit_extract(agx_builder *b, agx_index vec, unsigned channel)
}
static void
agx_cache_combine(agx_builder *b, agx_index dst,
agx_index s0, agx_index s1, agx_index s2, agx_index s3)
agx_cache_combine(agx_builder *b, agx_index dst, unsigned nr_srcs,
agx_index *srcs)
{
/* Lifetime of a hash table entry has to be at least as long as the table */
agx_index *channels = ralloc_array(b->shader, agx_index, 4);
agx_index *channels = ralloc_array(b->shader, agx_index, nr_srcs);
channels[0] = s0;
channels[1] = s1;
channels[2] = s2;
channels[3] = s3;
for (unsigned i = 0; i < nr_srcs; ++i)
channels[i] = srcs[i];
_mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst),
channels);
@@ -142,11 +140,34 @@ agx_cache_combine(agx_builder *b, agx_index dst,
* To optimize vector extractions, we record the individual channels
*/
static agx_instr *
agx_emit_combine_to(agx_builder *b, agx_index dst,
agx_index s0, agx_index s1, agx_index s2, agx_index s3)
agx_emit_combine_to(agx_builder *b, agx_index dst, unsigned nr_srcs,
agx_index *srcs)
{
agx_cache_combine(b, dst, s0, s1, s2, s3);
return agx_p_combine_to(b, dst, s0, s1, s2, s3);
agx_cache_combine(b, dst, 4, srcs);
agx_instr *I = agx_p_combine_to(b, dst, nr_srcs);
agx_foreach_src(I, s)
I->src[s] = srcs[s];
return I;
}
static agx_index
agx_vec4(agx_builder *b, agx_index s0, agx_index s1, agx_index s2, agx_index s3)
{
agx_index dst = agx_temp(b->shader, s0.size);
agx_index idx[4] = { s0, s1, s2, s3 };
agx_emit_combine_to(b, dst, 4, idx);
return dst;
}
static agx_index
agx_vec2(agx_builder *b, agx_index s0, agx_index s1)
{
agx_index dst = agx_temp(b->shader, s0.size);
agx_index idx[2] = { s0, s1 };
agx_emit_combine_to(b, dst, 2, idx);
return dst;
}
static void
@@ -197,7 +218,7 @@ agx_emit_cached_split(agx_builder *b, agx_index vec, unsigned n)
{
agx_index dests[4] = { agx_null(), agx_null(), agx_null(), agx_null() };
agx_emit_split(b, dests, vec, n);
agx_cache_combine(b, vec, dests[0], dests[1], dests[2], dests[3]);
agx_cache_combine(b, vec, n, dests);
}
static void
@@ -654,7 +675,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
* If only individual components are accessed, this combine will be dead code
* eliminated.
*/
return agx_emit_combine_to(b, dst, dests[0], dests[1], dests[2], dests[3]);
return agx_emit_combine_to(b, dst, 4, dests);
}
static agx_index
@@ -926,7 +947,10 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
return agx_emit_combine_to(b, dst, s0, s1, s2, s3);
{
agx_index idx[] = { s0, s1, s2, s3 };
return agx_emit_combine_to(b, dst, 4, idx);
}
case nir_op_vec8:
case nir_op_vec16:
@@ -1049,7 +1073,7 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
agx_mov_to(b, layer32, layer);
channels[nr - 1] = layer32;
coords = agx_p_combine(b, channels[0], channels[1], channels[2], channels[3]);
coords = agx_vec4(b, channels[0], channels[1], channels[2], channels[3]);
} else {
coords = index;
}

View File

@@ -25,11 +25,16 @@ opcodes = {}
immediates = {}
enums = {}
VARIABLE = ~0
class Opcode(object):
def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32):
def __init__(self, name, dests, srcs, imms, is_float, can_eliminate,
encoding_16, encoding_32):
self.name = name
self.dests = dests
self.srcs = srcs
self.dests = dests if dests != VARIABLE else 0
self.srcs = srcs if srcs != VARIABLE else 0
self.variable_srcs = (srcs == VARIABLE)
self.variable_dests = (dests == VARIABLE)
self.imms = imms
self.is_float = is_float
self.can_eliminate = can_eliminate
@@ -57,7 +62,8 @@ class Encoding(object):
if self.extensible:
assert(length_long == length_short + (4 if length_short > 8 else 2))
def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None):
def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False,
can_eliminate = True, encoding_16 = None):
encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
@@ -258,7 +264,7 @@ op("or", _, srcs = 2)
# Indicates the logical end of the block, before final branches/control flow
op("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False)
op("p_combine", _, srcs = 4)
op("p_combine", _, srcs = VARIABLE)
op("p_split", _, srcs = 1, dests = 4)
# Phis are special-cased in the IR as they (uniquely) can take an unbounded

View File

@@ -47,16 +47,7 @@ agx_write_registers(agx_instr *I, unsigned d)
case AGX_OPCODE_LDCF:
return 6;
case AGX_OPCODE_P_COMBINE:
{
unsigned components = 0;
for (unsigned i = 0; i < 4; ++i) {
if (!agx_is_null(I->src[i]))
components = i + 1;
}
return components * size;
}
return I->nr_srcs * size;
default:
return size;
}
@@ -325,11 +316,11 @@ agx_ra(agx_context *ctx)
unsigned base = agx_index_to_reg(ssa_to_reg, ins->dest[0]);
unsigned width = agx_size_align_16(ins->dest[0].size);
struct agx_copy copies[4];
struct agx_copy *copies = alloca(sizeof(copies[0]) * ins->nr_srcs);
unsigned n = 0;
/* Move the sources */
for (unsigned i = 0; i < 4; ++i) {
agx_foreach_src(ins, i) {
if (agx_is_null(ins->src[i])) continue;
assert(ins->src[i].size == ins->dest[0].size);

View File

@@ -105,7 +105,13 @@ TEST_F(Optimizer, Copyprop)
TEST_F(Optimizer, InlineHazards)
{
NEGCASE(agx_p_combine_to(b, wx, agx_mov_imm(b, AGX_SIZE_32, 0), wy, wz, wz));
NEGCASE({
agx_instr *I = agx_p_combine_to(b, wx, 4);
I->src[0] = agx_mov_imm(b, AGX_SIZE_32, 0);
I->src[1] = wy;
I->src[2] = wz;
I->src[3] = wz;
});
}
TEST_F(Optimizer, CopypropRespectsAbsNeg)