ir3: Plumb through support for a1.x

This will need to be used in some cases for the upcoming bindless
support, plus ldc.k instructions which push data from a UBO to const
registers.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
This commit is contained in:
Connor Abbott
2020-03-18 18:06:41 +01:00
committed by Marge Bot
parent c8b0f90439
commit de7d90ef53
11 changed files with 164 additions and 67 deletions

View File

@@ -1086,7 +1086,14 @@ ir3_instr_set_address(struct ir3_instruction *instr,
debug_assert(instr->block == addr->block);
instr->address = addr;
array_insert(ir, ir->indirects, instr);
debug_assert(reg_num(addr->regs[0]) == REG_A0);
unsigned comp = reg_comp(addr->regs[0]);
if (comp == 0) {
array_insert(ir, ir->a0_users, instr);
} else {
debug_assert(comp == 1);
array_insert(ir, ir->a1_users, instr);
}
}
}

View File

@@ -469,7 +469,10 @@ struct ir3 {
* convenient list of instructions that reference some address
* register simplifies this.
*/
DECLARE_ARRAY(struct ir3_instruction *, indirects);
DECLARE_ARRAY(struct ir3_instruction *, a0_users);
/* same for a1.x: */
DECLARE_ARRAY(struct ir3_instruction *, a1_users);
/* and same for instructions that consume predicate register: */
DECLARE_ARRAY(struct ir3_instruction *, predicates);
@@ -695,10 +698,10 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
dst = instr->regs[0];
/* mov's that write to a0.x or p0.x are special: */
/* mov's that write to a0 or p0.x are special: */
if (dst->num == regid(REG_P0, 0))
return false;
if (dst->num == regid(REG_A0, 0))
if (reg_num(dst) == REG_A0)
return false;
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
@@ -848,11 +851,20 @@ static inline unsigned dest_regs(struct ir3_instruction *instr)
return util_last_bit(instr->regs[0]->wrmask);
}
static inline bool writes_addr(struct ir3_instruction *instr)
static inline bool writes_addr0(struct ir3_instruction *instr)
{
if (instr->regs_count > 0) {
struct ir3_register *dst = instr->regs[0];
return reg_num(dst) == REG_A0;
return dst->num == regid(REG_A0, 0);
}
return false;
}
static inline bool writes_addr1(struct ir3_instruction *instr)
{
if (instr->regs_count > 0) {
struct ir3_register *dst = instr->regs[0];
return dst->num == regid(REG_A0, 1);
}
return false;
}

View File

@@ -744,8 +744,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
base_lo = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz));
base_hi = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz) + 1);
} else {
base_lo = create_uniform_indirect(b, ubo, ir3_get_addr(ctx, src0, ptrsz));
base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr(ctx, src0, ptrsz));
base_lo = create_uniform_indirect(b, ubo, ir3_get_addr0(ctx, src0, ptrsz));
base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr0(ctx, src0, ptrsz));
/* NOTE: since relative addressing is used, make sure constlen is
* at least big enough to cover all the UBO addresses, since the
@@ -1362,7 +1362,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
src = ir3_get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
dst[i] = create_uniform_indirect(b, idx + i,
ir3_get_addr(ctx, src[0], 1));
ir3_get_addr0(ctx, src[0], 1));
}
/* NOTE: if relative addressing is used, we set
* constlen in the compiler (to worst-case value)
@@ -1558,7 +1558,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
src = ir3_get_src(ctx, &intr->src[0]);
struct ir3_instruction *collect =
ir3_create_collect(ctx, ctx->ir->inputs, ctx->ninputs);
struct ir3_instruction *addr = ir3_get_addr(ctx, src[0], 4);
struct ir3_instruction *addr = ir3_get_addr0(ctx, src[0], 4);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i + comp;
dst[i] = create_indirect_load(ctx, ctx->ninputs,
@@ -2424,11 +2424,14 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
list_addtail(&block->node, &ctx->ir->block_list);
/* re-emit addr register in each block if needed: */
for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) {
_mesa_hash_table_destroy(ctx->addr_ht[i], NULL);
ctx->addr_ht[i] = NULL;
for (int i = 0; i < ARRAY_SIZE(ctx->addr0_ht); i++) {
_mesa_hash_table_destroy(ctx->addr0_ht[i], NULL);
ctx->addr0_ht[i] = NULL;
}
_mesa_hash_table_u64_destroy(ctx->addr1_ht, NULL);
ctx->addr1_ht = NULL;
nir_foreach_instr (instr, nblock) {
ctx->cur_instr = instr;
emit_instr(ctx, instr);

View File

@@ -184,7 +184,7 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src)
ralloc_array(ctx, struct ir3_instruction *, num_components);
if (src->reg.indirect)
addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
reg->num_components);
for (unsigned i = 0; i < num_components; i++) {
@@ -230,7 +230,7 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
struct ir3_instruction *addr = NULL;
if (dst->reg.indirect)
addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
reg->num_components);
for (unsigned i = 0; i < num_components; i++) {
@@ -378,7 +378,7 @@ ir3_context_error(struct ir3_context *ctx, const char *format, ...)
}
static struct ir3_instruction *
create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
{
struct ir3_instruction *instr, *immed;
@@ -433,29 +433,62 @@ create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
return instr;
}
static struct ir3_instruction *
create_addr1(struct ir3_block *block, unsigned const_val)
{
struct ir3_instruction *immed = create_immed(block, const_val);
struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_S16);
instr->regs[0]->num = regid(REG_A0, 1);
instr->regs[0]->flags &= ~IR3_REG_SSA;
instr->regs[0]->flags |= IR3_REG_HALF;
instr->regs[1]->flags |= IR3_REG_HALF;
return instr;
}
/* caches addr values to avoid generating multiple cov/shl/mova
* sequences for each use of a given NIR level src as address
*/
struct ir3_instruction *
ir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align)
ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
{
struct ir3_instruction *addr;
unsigned idx = align - 1;
compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht));
compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
if (!ctx->addr_ht[idx]) {
ctx->addr_ht[idx] = _mesa_hash_table_create(ctx,
if (!ctx->addr0_ht[idx]) {
ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
} else {
struct hash_entry *entry;
entry = _mesa_hash_table_search(ctx->addr_ht[idx], src);
entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
if (entry)
return entry->data;
}
addr = create_addr(ctx->block, src, align);
_mesa_hash_table_insert(ctx->addr_ht[idx], src, addr);
addr = create_addr0(ctx->block, src, align);
_mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
return addr;
}
/* Similar to ir3_get_addr0, but for a1.x. */
struct ir3_instruction *
ir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
{
struct ir3_instruction *addr;
if (!ctx->addr1_ht) {
ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
} else {
addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
if (addr)
return addr;
}
addr = create_addr1(ctx->block, const_val);
_mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
return addr;
}

View File

@@ -120,7 +120,12 @@ struct ir3_context {
* src used for an array of vec1 cannot be also used for an
* array of vec4.
*/
struct hash_table *addr_ht[4];
struct hash_table *addr0_ht[4];
/* The same for a1.x. We only support immediate values for a1.x, as this
* is the only use so far.
*/
struct hash_table_u64 *addr1_ht;
/* last dst array, for indirect we need to insert a var-store.
*/
@@ -176,8 +181,10 @@ NORETURN void ir3_context_error(struct ir3_context *ctx, const char *format, ...
if (!(cond)) ir3_context_error((ctx), "failed assert: "#cond"\n"); \
} while (0)
struct ir3_instruction * ir3_get_addr(struct ir3_context *ctx,
struct ir3_instruction * ir3_get_addr0(struct ir3_context *ctx,
struct ir3_instruction *src, int align);
struct ir3_instruction * ir3_get_addr1(struct ir3_context *ctx,
unsigned const_val);
struct ir3_instruction * ir3_get_predicate(struct ir3_context *ctx,
struct ir3_instruction *src);

View File

@@ -82,7 +82,7 @@ ir3_delayslots(struct ir3_instruction *assigner,
if (is_meta(assigner) || is_meta(consumer))
return 0;
if (writes_addr(assigner))
if (writes_addr0(assigner) || writes_addr1(assigner))
return 6;
/* On a6xx, it takes the number of delay slots to get a SFU result

View File

@@ -201,10 +201,16 @@ compute_depth_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
/* note that we can end up with unused indirects, but we should
* not end up with unused predicates.
*/
for (i = 0; i < ir->indirects_count; i++) {
struct ir3_instruction *instr = ir->indirects[i];
for (i = 0; i < ir->a0_users_count; i++) {
struct ir3_instruction *instr = ir->a0_users[i];
if (instr && (instr->flags & IR3_INSTR_UNUSED))
ir->indirects[i] = NULL;
ir->a0_users[i] = NULL;
}
for (i = 0; i < ir->a1_users_count; i++) {
struct ir3_instruction *instr = ir->a1_users[i];
if (instr && (instr->flags & IR3_INSTR_UNUSED))
ir->a1_users[i] = NULL;
}
/* cleanup unused inputs: */

View File

@@ -392,7 +392,6 @@ static void
calculate_deps(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *node)
{
static const struct ir3_register half_reg = { .flags = IR3_REG_HALF };
struct ir3_register *reg;
int b;
@@ -400,12 +399,6 @@ calculate_deps(struct ir3_postsched_deps_state *state,
* in the reverse direction) wrote any of our src registers:
*/
foreach_src_n (reg, i, node->instr) {
/* NOTE: relative access for a src can be either const or gpr: */
if (reg->flags & IR3_REG_RELATIV) {
/* also reads a0.x: */
add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
}
if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
continue;
@@ -428,6 +421,12 @@ calculate_deps(struct ir3_postsched_deps_state *state,
}
}
if (node->instr->address) {
add_reg_dep(state, node, node->instr->address->regs[0],
node->instr->address->regs[0]->num,
false);
}
if (dest_regs(node->instr) == 0)
return;
@@ -441,9 +440,6 @@ calculate_deps(struct ir3_postsched_deps_state *state,
for (unsigned i = 0; i < arr->length; i++) {
add_reg_dep(state, node, reg, arr->reg + i, true);
}
/* also reads a0.x: */
add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
} else {
foreach_bit (b, reg->wrmask) {
add_reg_dep(state, node, reg, reg->num + b, true);

View File

@@ -264,7 +264,7 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (instr->regs_count == 0)
continue;
/* couple special cases: */
if (writes_addr(instr) || writes_pred(instr)) {
if (writes_addr0(instr) || writes_addr1(instr) || writes_pred(instr)) {
id->cls = -1;
} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
id->cls = total_class_count;

View File

@@ -199,7 +199,7 @@ writes_gpr(struct ir3_instruction *instr)
/* is dest a normal temp register: */
struct ir3_register *reg = instr->regs[0];
debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
if ((reg->num == regid(REG_A0, 0)) ||
if ((reg_num(reg) == REG_A0) ||
(reg->num == regid(REG_P0, 0)))
return false;
return true;

View File

@@ -68,7 +68,8 @@ struct ir3_sched_ctx {
struct ir3_block *block; /* the current block */
struct list_head depth_list; /* depth sorted unscheduled instrs */
struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
struct ir3_instruction *addr; /* current a0.x user, if any */
struct ir3_instruction *addr0; /* current a0.x user, if any */
struct ir3_instruction *addr1; /* current a1.x user, if any */
struct ir3_instruction *pred; /* current p0.x user, if any */
int live_values; /* estimate of current live values */
int half_live_values; /* estimate of current half precision live values */
@@ -225,9 +226,14 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
*/
list_delinit(&instr->node);
if (writes_addr(instr)) {
debug_assert(ctx->addr == NULL);
ctx->addr = instr;
if (writes_addr0(instr)) {
debug_assert(ctx->addr0 == NULL);
ctx->addr0 = instr;
}
if (writes_addr1(instr)) {
debug_assert(ctx->addr1 == NULL);
ctx->addr1 = instr;
}
if (writes_pred(instr)) {
@@ -244,7 +250,7 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
update_live_values(ctx, instr);
if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
if (writes_addr0(instr) || writes_addr1(instr) || writes_pred(instr) || is_input(instr)) {
clear_cache(ctx, NULL);
} else {
/* invalidate only the necessary entries.. */
@@ -281,7 +287,7 @@ struct ir3_sched_notes {
/* there is at least one instruction that could be scheduled,
* except for conflicting address/predicate register usage:
*/
bool addr_conflict, pred_conflict;
bool addr0_conflict, addr1_conflict, pred_conflict;
};
/* could an instruction be scheduled if specified ssa src was scheduled? */
@@ -314,11 +320,28 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
* TODO if any instructions use pred register and have other
* src args, we would need to do the same for writes_pred()..
*/
if (writes_addr(instr)) {
if (writes_addr0(instr)) {
struct ir3 *ir = instr->block->shader;
bool ready = false;
for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
struct ir3_instruction *indirect = ir->indirects[i];
for (unsigned i = 0; (i < ir->a0_users_count) && !ready; i++) {
struct ir3_instruction *indirect = ir->a0_users[i];
if (!indirect)
continue;
if (indirect->address != instr)
continue;
ready = could_sched(indirect, instr);
}
/* nothing could be scheduled, so keep looking: */
if (!ready)
return false;
}
if (writes_addr1(instr)) {
struct ir3 *ir = instr->block->shader;
bool ready = false;
for (unsigned i = 0; (i < ir->a1_users_count) && !ready; i++) {
struct ir3_instruction *indirect = ir->a1_users[i];
if (!indirect)
continue;
if (indirect->address != instr)
@@ -335,9 +358,15 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
* register is currently in use, we need to defer until it is
* free:
*/
if (writes_addr(instr) && ctx->addr) {
debug_assert(ctx->addr != instr);
notes->addr_conflict = true;
if (writes_addr0(instr) && ctx->addr0) {
debug_assert(ctx->addr0 != instr);
notes->addr0_conflict = true;
return false;
}
if (writes_addr1(instr) && ctx->addr1) {
debug_assert(ctx->addr1 != instr);
notes->addr1_conflict = true;
return false;
}
@@ -585,23 +614,21 @@ split_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr)
return new_instr;
}
/* "spill" the address register by remapping any unscheduled
/* "spill" the address registers by remapping any unscheduled
* instructions which depend on the current address register
* to a clone of the instruction which wrote the address reg.
*/
static struct ir3_instruction *
split_addr(struct ir3_sched_ctx *ctx)
split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
struct ir3_instruction **users, unsigned users_count)
{
struct ir3 *ir;
struct ir3_instruction *new_addr = NULL;
unsigned i;
debug_assert(ctx->addr);
debug_assert(*addr);
ir = ctx->addr->block->shader;
for (i = 0; i < ir->indirects_count; i++) {
struct ir3_instruction *indirect = ir->indirects[i];
for (i = 0; i < users_count; i++) {
struct ir3_instruction *indirect = users[i];
if (!indirect)
continue;
@@ -613,9 +640,9 @@ split_addr(struct ir3_sched_ctx *ctx)
/* remap remaining instructions using current addr
* to new addr:
*/
if (indirect->address == ctx->addr) {
if (indirect->address == *addr) {
if (!new_addr) {
new_addr = split_instr(ctx, ctx->addr);
new_addr = split_instr(ctx, *addr);
/* original addr is scheduled, but new one isn't: */
new_addr->flags &= ~IR3_INSTR_MARK;
}
@@ -625,7 +652,7 @@ split_addr(struct ir3_sched_ctx *ctx)
}
/* all remaining indirects remapped to new addr: */
ctx->addr = NULL;
*addr = NULL;
return new_addr;
}
@@ -682,7 +709,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
ctx->block = block;
/* addr/pred writes are per-block: */
ctx->addr = NULL;
ctx->addr0 = NULL;
ctx->addr1 = NULL;
ctx->pred = NULL;
/* move all instructions to the unscheduled list, and
@@ -740,14 +768,19 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
schedule(ctx, instr);
} else {
struct ir3_instruction *new_instr = NULL;
struct ir3 *ir = block->shader;
/* nothing available to schedule.. if we are blocked on
* address/predicate register conflict, then break the
* deadlock by cloning the instruction that wrote that
* reg:
*/
if (notes.addr_conflict) {
new_instr = split_addr(ctx);
if (notes.addr0_conflict) {
new_instr = split_addr(ctx, &ctx->addr0,
ir->a0_users, ir->a0_users_count);
} else if (notes.addr1_conflict) {
new_instr = split_addr(ctx, &ctx->addr1,
ir->a1_users, ir->a1_users_count);
} else if (notes.pred_conflict) {
new_instr = split_pred(ctx);
} else {