agx: Make p_combine take a dynamic src count

For larger vectors.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18525>
This commit is contained in:
Alyssa Rosenzweig
2022-09-09 14:32:01 -04:00
committed by Marge Bot
parent ef31dceee8
commit 4f85a7be8c
5 changed files with 69 additions and 35 deletions

View File

@@ -50,6 +50,10 @@ agx_${opcode}${suffix}(agx_builder *b
, agx_index dst${dest} , agx_index dst${dest}
% endfor % endfor
% if op.variable_srcs:
, unsigned nr_srcs
% endif
% for src in range(srcs): % for src in range(srcs):
, agx_index src${src} , agx_index src${src}
% endfor % endfor
@@ -65,7 +69,10 @@ agx_${opcode}${suffix}(agx_builder *b
I->dest[${dest}] = dst${dest}; I->dest[${dest}] = dst${dest};
% endfor % endfor
% if srcs > 0: % if op.variable_srcs:
I->src = ralloc_array(I, agx_index, nr_srcs);
I->nr_srcs = nr_srcs;
% elif srcs > 0:
I->src = ralloc_array(I, agx_index, ${srcs}); I->src = ralloc_array(I, agx_index, ${srcs});
I->nr_srcs = ${srcs}; I->nr_srcs = ${srcs};
@@ -82,7 +89,7 @@ agx_${opcode}${suffix}(agx_builder *b
return I; return I;
} }
% if dests == 1: % if dests == 1 and not op.variable_srcs:
static inline agx_index static inline agx_index
agx_${opcode}(agx_builder *b agx_${opcode}(agx_builder *b

View File

@@ -120,16 +120,14 @@ agx_emit_extract(agx_builder *b, agx_index vec, unsigned channel)
} }
static void static void
agx_cache_combine(agx_builder *b, agx_index dst, agx_cache_combine(agx_builder *b, agx_index dst, unsigned nr_srcs,
agx_index s0, agx_index s1, agx_index s2, agx_index s3) agx_index *srcs)
{ {
/* Lifetime of a hash table entry has to be at least as long as the table */ /* Lifetime of a hash table entry has to be at least as long as the table */
agx_index *channels = ralloc_array(b->shader, agx_index, 4); agx_index *channels = ralloc_array(b->shader, agx_index, nr_srcs);
channels[0] = s0; for (unsigned i = 0; i < nr_srcs; ++i)
channels[1] = s1; channels[i] = srcs[i];
channels[2] = s2;
channels[3] = s3;
_mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst), _mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst),
channels); channels);
@@ -142,11 +140,34 @@ agx_cache_combine(agx_builder *b, agx_index dst,
* To optimize vector extractions, we record the individual channels * To optimize vector extractions, we record the individual channels
*/ */
static agx_instr * static agx_instr *
agx_emit_combine_to(agx_builder *b, agx_index dst, agx_emit_combine_to(agx_builder *b, agx_index dst, unsigned nr_srcs,
agx_index s0, agx_index s1, agx_index s2, agx_index s3) agx_index *srcs)
{ {
agx_cache_combine(b, dst, s0, s1, s2, s3); agx_cache_combine(b, dst, 4, srcs);
return agx_p_combine_to(b, dst, s0, s1, s2, s3); agx_instr *I = agx_p_combine_to(b, dst, nr_srcs);
agx_foreach_src(I, s)
I->src[s] = srcs[s];
return I;
}
static agx_index
agx_vec4(agx_builder *b, agx_index s0, agx_index s1, agx_index s2, agx_index s3)
{
agx_index dst = agx_temp(b->shader, s0.size);
agx_index idx[4] = { s0, s1, s2, s3 };
agx_emit_combine_to(b, dst, 4, idx);
return dst;
}
static agx_index
agx_vec2(agx_builder *b, agx_index s0, agx_index s1)
{
agx_index dst = agx_temp(b->shader, s0.size);
agx_index idx[2] = { s0, s1 };
agx_emit_combine_to(b, dst, 2, idx);
return dst;
} }
static void static void
@@ -197,7 +218,7 @@ agx_emit_cached_split(agx_builder *b, agx_index vec, unsigned n)
{ {
agx_index dests[4] = { agx_null(), agx_null(), agx_null(), agx_null() }; agx_index dests[4] = { agx_null(), agx_null(), agx_null(), agx_null() };
agx_emit_split(b, dests, vec, n); agx_emit_split(b, dests, vec, n);
agx_cache_combine(b, vec, dests[0], dests[1], dests[2], dests[3]); agx_cache_combine(b, vec, n, dests);
} }
static void static void
@@ -654,7 +675,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
* If only individual components are accessed, this combine will be dead code * If only individual components are accessed, this combine will be dead code
* eliminated. * eliminated.
*/ */
return agx_emit_combine_to(b, dst, dests[0], dests[1], dests[2], dests[3]); return agx_emit_combine_to(b, dst, 4, dests);
} }
static agx_index static agx_index
@@ -926,7 +947,10 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
case nir_op_vec2: case nir_op_vec2:
case nir_op_vec3: case nir_op_vec3:
case nir_op_vec4: case nir_op_vec4:
return agx_emit_combine_to(b, dst, s0, s1, s2, s3); {
agx_index idx[] = { s0, s1, s2, s3 };
return agx_emit_combine_to(b, dst, 4, idx);
}
case nir_op_vec8: case nir_op_vec8:
case nir_op_vec16: case nir_op_vec16:
@@ -1049,7 +1073,7 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
agx_mov_to(b, layer32, layer); agx_mov_to(b, layer32, layer);
channels[nr - 1] = layer32; channels[nr - 1] = layer32;
coords = agx_p_combine(b, channels[0], channels[1], channels[2], channels[3]); coords = agx_vec4(b, channels[0], channels[1], channels[2], channels[3]);
} else { } else {
coords = index; coords = index;
} }

View File

@@ -25,11 +25,16 @@ opcodes = {}
immediates = {} immediates = {}
enums = {} enums = {}
VARIABLE = ~0
class Opcode(object): class Opcode(object):
def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32): def __init__(self, name, dests, srcs, imms, is_float, can_eliminate,
encoding_16, encoding_32):
self.name = name self.name = name
self.dests = dests self.dests = dests if dests != VARIABLE else 0
self.srcs = srcs self.srcs = srcs if srcs != VARIABLE else 0
self.variable_srcs = (srcs == VARIABLE)
self.variable_dests = (dests == VARIABLE)
self.imms = imms self.imms = imms
self.is_float = is_float self.is_float = is_float
self.can_eliminate = can_eliminate self.can_eliminate = can_eliminate
@@ -57,7 +62,8 @@ class Encoding(object):
if self.extensible: if self.extensible:
assert(length_long == length_short + (4 if length_short > 8 else 2)) assert(length_long == length_short + (4 if length_short > 8 else 2))
def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None): def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False,
can_eliminate = True, encoding_16 = None):
encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
@@ -258,7 +264,7 @@ op("or", _, srcs = 2)
# Indicates the logical end of the block, before final branches/control flow # Indicates the logical end of the block, before final branches/control flow
op("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False) op("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False)
op("p_combine", _, srcs = 4) op("p_combine", _, srcs = VARIABLE)
op("p_split", _, srcs = 1, dests = 4) op("p_split", _, srcs = 1, dests = 4)
# Phis are special-cased in the IR as they (uniquely) can take an unbounded # Phis are special-cased in the IR as they (uniquely) can take an unbounded

View File

@@ -47,16 +47,7 @@ agx_write_registers(agx_instr *I, unsigned d)
case AGX_OPCODE_LDCF: case AGX_OPCODE_LDCF:
return 6; return 6;
case AGX_OPCODE_P_COMBINE: case AGX_OPCODE_P_COMBINE:
{ return I->nr_srcs * size;
unsigned components = 0;
for (unsigned i = 0; i < 4; ++i) {
if (!agx_is_null(I->src[i]))
components = i + 1;
}
return components * size;
}
default: default:
return size; return size;
} }
@@ -325,11 +316,11 @@ agx_ra(agx_context *ctx)
unsigned base = agx_index_to_reg(ssa_to_reg, ins->dest[0]); unsigned base = agx_index_to_reg(ssa_to_reg, ins->dest[0]);
unsigned width = agx_size_align_16(ins->dest[0].size); unsigned width = agx_size_align_16(ins->dest[0].size);
struct agx_copy copies[4]; struct agx_copy *copies = alloca(sizeof(copies[0]) * ins->nr_srcs);
unsigned n = 0; unsigned n = 0;
/* Move the sources */ /* Move the sources */
for (unsigned i = 0; i < 4; ++i) { agx_foreach_src(ins, i) {
if (agx_is_null(ins->src[i])) continue; if (agx_is_null(ins->src[i])) continue;
assert(ins->src[i].size == ins->dest[0].size); assert(ins->src[i].size == ins->dest[0].size);

View File

@@ -105,7 +105,13 @@ TEST_F(Optimizer, Copyprop)
TEST_F(Optimizer, InlineHazards) TEST_F(Optimizer, InlineHazards)
{ {
NEGCASE(agx_p_combine_to(b, wx, agx_mov_imm(b, AGX_SIZE_32, 0), wy, wz, wz)); NEGCASE({
agx_instr *I = agx_p_combine_to(b, wx, 4);
I->src[0] = agx_mov_imm(b, AGX_SIZE_32, 0);
I->src[1] = wy;
I->src[2] = wz;
I->src[3] = wz;
});
} }
TEST_F(Optimizer, CopypropRespectsAbsNeg) TEST_F(Optimizer, CopypropRespectsAbsNeg)