ir3: Plumb through support for a1.x
This will need to be used in some cases for the upcoming bindless support, plus ldc.k instructions which push data from a UBO to const registers. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
This commit is contained in:
@@ -1086,7 +1086,14 @@ ir3_instr_set_address(struct ir3_instruction *instr,
|
||||
debug_assert(instr->block == addr->block);
|
||||
|
||||
instr->address = addr;
|
||||
array_insert(ir, ir->indirects, instr);
|
||||
debug_assert(reg_num(addr->regs[0]) == REG_A0);
|
||||
unsigned comp = reg_comp(addr->regs[0]);
|
||||
if (comp == 0) {
|
||||
array_insert(ir, ir->a0_users, instr);
|
||||
} else {
|
||||
debug_assert(comp == 1);
|
||||
array_insert(ir, ir->a1_users, instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -469,7 +469,10 @@ struct ir3 {
|
||||
* convenient list of instructions that reference some address
|
||||
* register simplifies this.
|
||||
*/
|
||||
DECLARE_ARRAY(struct ir3_instruction *, indirects);
|
||||
DECLARE_ARRAY(struct ir3_instruction *, a0_users);
|
||||
|
||||
/* same for a1.x: */
|
||||
DECLARE_ARRAY(struct ir3_instruction *, a1_users);
|
||||
|
||||
/* and same for instructions that consume predicate register: */
|
||||
DECLARE_ARRAY(struct ir3_instruction *, predicates);
|
||||
@@ -695,10 +698,10 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
|
||||
|
||||
dst = instr->regs[0];
|
||||
|
||||
/* mov's that write to a0.x or p0.x are special: */
|
||||
/* mov's that write to a0 or p0.x are special: */
|
||||
if (dst->num == regid(REG_P0, 0))
|
||||
return false;
|
||||
if (dst->num == regid(REG_A0, 0))
|
||||
if (reg_num(dst) == REG_A0)
|
||||
return false;
|
||||
|
||||
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
|
||||
@@ -848,11 +851,20 @@ static inline unsigned dest_regs(struct ir3_instruction *instr)
|
||||
return util_last_bit(instr->regs[0]->wrmask);
|
||||
}
|
||||
|
||||
static inline bool writes_addr(struct ir3_instruction *instr)
|
||||
static inline bool writes_addr0(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->regs_count > 0) {
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
return reg_num(dst) == REG_A0;
|
||||
return dst->num == regid(REG_A0, 0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool writes_addr1(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->regs_count > 0) {
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
return dst->num == regid(REG_A0, 1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@@ -744,8 +744,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
base_lo = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz));
|
||||
base_hi = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz) + 1);
|
||||
} else {
|
||||
base_lo = create_uniform_indirect(b, ubo, ir3_get_addr(ctx, src0, ptrsz));
|
||||
base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr(ctx, src0, ptrsz));
|
||||
base_lo = create_uniform_indirect(b, ubo, ir3_get_addr0(ctx, src0, ptrsz));
|
||||
base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr0(ctx, src0, ptrsz));
|
||||
|
||||
/* NOTE: since relative addressing is used, make sure constlen is
|
||||
* at least big enough to cover all the UBO addresses, since the
|
||||
@@ -1362,7 +1362,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
src = ir3_get_src(ctx, &intr->src[0]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
dst[i] = create_uniform_indirect(b, idx + i,
|
||||
ir3_get_addr(ctx, src[0], 1));
|
||||
ir3_get_addr0(ctx, src[0], 1));
|
||||
}
|
||||
/* NOTE: if relative addressing is used, we set
|
||||
* constlen in the compiler (to worst-case value)
|
||||
@@ -1558,7 +1558,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
src = ir3_get_src(ctx, &intr->src[0]);
|
||||
struct ir3_instruction *collect =
|
||||
ir3_create_collect(ctx, ctx->ir->inputs, ctx->ninputs);
|
||||
struct ir3_instruction *addr = ir3_get_addr(ctx, src[0], 4);
|
||||
struct ir3_instruction *addr = ir3_get_addr0(ctx, src[0], 4);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i + comp;
|
||||
dst[i] = create_indirect_load(ctx, ctx->ninputs,
|
||||
@@ -2424,11 +2424,14 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
|
||||
list_addtail(&block->node, &ctx->ir->block_list);
|
||||
|
||||
/* re-emit addr register in each block if needed: */
|
||||
for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) {
|
||||
_mesa_hash_table_destroy(ctx->addr_ht[i], NULL);
|
||||
ctx->addr_ht[i] = NULL;
|
||||
for (int i = 0; i < ARRAY_SIZE(ctx->addr0_ht); i++) {
|
||||
_mesa_hash_table_destroy(ctx->addr0_ht[i], NULL);
|
||||
ctx->addr0_ht[i] = NULL;
|
||||
}
|
||||
|
||||
_mesa_hash_table_u64_destroy(ctx->addr1_ht, NULL);
|
||||
ctx->addr1_ht = NULL;
|
||||
|
||||
nir_foreach_instr (instr, nblock) {
|
||||
ctx->cur_instr = instr;
|
||||
emit_instr(ctx, instr);
|
||||
|
@@ -184,7 +184,7 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src)
|
||||
ralloc_array(ctx, struct ir3_instruction *, num_components);
|
||||
|
||||
if (src->reg.indirect)
|
||||
addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
|
||||
addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
|
||||
reg->num_components);
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
@@ -230,7 +230,7 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
|
||||
struct ir3_instruction *addr = NULL;
|
||||
|
||||
if (dst->reg.indirect)
|
||||
addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
|
||||
addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
|
||||
reg->num_components);
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
@@ -378,7 +378,7 @@ ir3_context_error(struct ir3_context *ctx, const char *format, ...)
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
|
||||
create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
|
||||
{
|
||||
struct ir3_instruction *instr, *immed;
|
||||
|
||||
@@ -433,29 +433,62 @@ create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
|
||||
return instr;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_addr1(struct ir3_block *block, unsigned const_val)
|
||||
{
|
||||
|
||||
struct ir3_instruction *immed = create_immed(block, const_val);
|
||||
struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_S16);
|
||||
instr->regs[0]->num = regid(REG_A0, 1);
|
||||
instr->regs[0]->flags &= ~IR3_REG_SSA;
|
||||
instr->regs[0]->flags |= IR3_REG_HALF;
|
||||
instr->regs[1]->flags |= IR3_REG_HALF;
|
||||
return instr;
|
||||
}
|
||||
|
||||
/* caches addr values to avoid generating multiple cov/shl/mova
|
||||
* sequences for each use of a given NIR level src as address
|
||||
*/
|
||||
struct ir3_instruction *
|
||||
ir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align)
|
||||
ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
|
||||
{
|
||||
struct ir3_instruction *addr;
|
||||
unsigned idx = align - 1;
|
||||
|
||||
compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht));
|
||||
compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
|
||||
|
||||
if (!ctx->addr_ht[idx]) {
|
||||
ctx->addr_ht[idx] = _mesa_hash_table_create(ctx,
|
||||
if (!ctx->addr0_ht[idx]) {
|
||||
ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx,
|
||||
_mesa_hash_pointer, _mesa_key_pointer_equal);
|
||||
} else {
|
||||
struct hash_entry *entry;
|
||||
entry = _mesa_hash_table_search(ctx->addr_ht[idx], src);
|
||||
entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
|
||||
if (entry)
|
||||
return entry->data;
|
||||
}
|
||||
|
||||
addr = create_addr(ctx->block, src, align);
|
||||
_mesa_hash_table_insert(ctx->addr_ht[idx], src, addr);
|
||||
addr = create_addr0(ctx->block, src, align);
|
||||
_mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* Similar to ir3_get_addr0, but for a1.x. */
|
||||
struct ir3_instruction *
|
||||
ir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
|
||||
{
|
||||
struct ir3_instruction *addr;
|
||||
|
||||
if (!ctx->addr1_ht) {
|
||||
ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
|
||||
} else {
|
||||
addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
|
||||
if (addr)
|
||||
return addr;
|
||||
}
|
||||
|
||||
addr = create_addr1(ctx->block, const_val);
|
||||
_mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
@@ -120,7 +120,12 @@ struct ir3_context {
|
||||
* src used for an array of vec1 cannot be also used for an
|
||||
* array of vec4.
|
||||
*/
|
||||
struct hash_table *addr_ht[4];
|
||||
struct hash_table *addr0_ht[4];
|
||||
|
||||
/* The same for a1.x. We only support immediate values for a1.x, as this
|
||||
* is the only use so far.
|
||||
*/
|
||||
struct hash_table_u64 *addr1_ht;
|
||||
|
||||
/* last dst array, for indirect we need to insert a var-store.
|
||||
*/
|
||||
@@ -176,8 +181,10 @@ NORETURN void ir3_context_error(struct ir3_context *ctx, const char *format, ...
|
||||
if (!(cond)) ir3_context_error((ctx), "failed assert: "#cond"\n"); \
|
||||
} while (0)
|
||||
|
||||
struct ir3_instruction * ir3_get_addr(struct ir3_context *ctx,
|
||||
struct ir3_instruction * ir3_get_addr0(struct ir3_context *ctx,
|
||||
struct ir3_instruction *src, int align);
|
||||
struct ir3_instruction * ir3_get_addr1(struct ir3_context *ctx,
|
||||
unsigned const_val);
|
||||
struct ir3_instruction * ir3_get_predicate(struct ir3_context *ctx,
|
||||
struct ir3_instruction *src);
|
||||
|
||||
|
@@ -82,7 +82,7 @@ ir3_delayslots(struct ir3_instruction *assigner,
|
||||
if (is_meta(assigner) || is_meta(consumer))
|
||||
return 0;
|
||||
|
||||
if (writes_addr(assigner))
|
||||
if (writes_addr0(assigner) || writes_addr1(assigner))
|
||||
return 6;
|
||||
|
||||
/* On a6xx, it takes the number of delay slots to get a SFU result
|
||||
|
@@ -201,10 +201,16 @@ compute_depth_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
|
||||
/* note that we can end up with unused indirects, but we should
|
||||
* not end up with unused predicates.
|
||||
*/
|
||||
for (i = 0; i < ir->indirects_count; i++) {
|
||||
struct ir3_instruction *instr = ir->indirects[i];
|
||||
for (i = 0; i < ir->a0_users_count; i++) {
|
||||
struct ir3_instruction *instr = ir->a0_users[i];
|
||||
if (instr && (instr->flags & IR3_INSTR_UNUSED))
|
||||
ir->indirects[i] = NULL;
|
||||
ir->a0_users[i] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < ir->a1_users_count; i++) {
|
||||
struct ir3_instruction *instr = ir->a1_users[i];
|
||||
if (instr && (instr->flags & IR3_INSTR_UNUSED))
|
||||
ir->a1_users[i] = NULL;
|
||||
}
|
||||
|
||||
/* cleanup unused inputs: */
|
||||
|
@@ -392,7 +392,6 @@ static void
|
||||
calculate_deps(struct ir3_postsched_deps_state *state,
|
||||
struct ir3_postsched_node *node)
|
||||
{
|
||||
static const struct ir3_register half_reg = { .flags = IR3_REG_HALF };
|
||||
struct ir3_register *reg;
|
||||
int b;
|
||||
|
||||
@@ -400,12 +399,6 @@ calculate_deps(struct ir3_postsched_deps_state *state,
|
||||
* in the reverse direction) wrote any of our src registers:
|
||||
*/
|
||||
foreach_src_n (reg, i, node->instr) {
|
||||
/* NOTE: relative access for a src can be either const or gpr: */
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
/* also reads a0.x: */
|
||||
add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
|
||||
}
|
||||
|
||||
if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
|
||||
continue;
|
||||
|
||||
@@ -428,6 +421,12 @@ calculate_deps(struct ir3_postsched_deps_state *state,
|
||||
}
|
||||
}
|
||||
|
||||
if (node->instr->address) {
|
||||
add_reg_dep(state, node, node->instr->address->regs[0],
|
||||
node->instr->address->regs[0]->num,
|
||||
false);
|
||||
}
|
||||
|
||||
if (dest_regs(node->instr) == 0)
|
||||
return;
|
||||
|
||||
@@ -441,9 +440,6 @@ calculate_deps(struct ir3_postsched_deps_state *state,
|
||||
for (unsigned i = 0; i < arr->length; i++) {
|
||||
add_reg_dep(state, node, reg, arr->reg + i, true);
|
||||
}
|
||||
|
||||
/* also reads a0.x: */
|
||||
add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
|
||||
} else {
|
||||
foreach_bit (b, reg->wrmask) {
|
||||
add_reg_dep(state, node, reg, reg->num + b, true);
|
||||
|
@@ -264,7 +264,7 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
if (instr->regs_count == 0)
|
||||
continue;
|
||||
/* couple special cases: */
|
||||
if (writes_addr(instr) || writes_pred(instr)) {
|
||||
if (writes_addr0(instr) || writes_addr1(instr) || writes_pred(instr)) {
|
||||
id->cls = -1;
|
||||
} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
|
||||
id->cls = total_class_count;
|
||||
|
@@ -199,7 +199,7 @@ writes_gpr(struct ir3_instruction *instr)
|
||||
/* is dest a normal temp register: */
|
||||
struct ir3_register *reg = instr->regs[0];
|
||||
debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
|
||||
if ((reg->num == regid(REG_A0, 0)) ||
|
||||
if ((reg_num(reg) == REG_A0) ||
|
||||
(reg->num == regid(REG_P0, 0)))
|
||||
return false;
|
||||
return true;
|
||||
|
@@ -68,7 +68,8 @@ struct ir3_sched_ctx {
|
||||
struct ir3_block *block; /* the current block */
|
||||
struct list_head depth_list; /* depth sorted unscheduled instrs */
|
||||
struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
|
||||
struct ir3_instruction *addr; /* current a0.x user, if any */
|
||||
struct ir3_instruction *addr0; /* current a0.x user, if any */
|
||||
struct ir3_instruction *addr1; /* current a1.x user, if any */
|
||||
struct ir3_instruction *pred; /* current p0.x user, if any */
|
||||
int live_values; /* estimate of current live values */
|
||||
int half_live_values; /* estimate of current half precision live values */
|
||||
@@ -225,9 +226,14 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
|
||||
*/
|
||||
list_delinit(&instr->node);
|
||||
|
||||
if (writes_addr(instr)) {
|
||||
debug_assert(ctx->addr == NULL);
|
||||
ctx->addr = instr;
|
||||
if (writes_addr0(instr)) {
|
||||
debug_assert(ctx->addr0 == NULL);
|
||||
ctx->addr0 = instr;
|
||||
}
|
||||
|
||||
if (writes_addr1(instr)) {
|
||||
debug_assert(ctx->addr1 == NULL);
|
||||
ctx->addr1 = instr;
|
||||
}
|
||||
|
||||
if (writes_pred(instr)) {
|
||||
@@ -244,7 +250,7 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
|
||||
|
||||
update_live_values(ctx, instr);
|
||||
|
||||
if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
|
||||
if (writes_addr0(instr) || writes_addr1(instr) || writes_pred(instr) || is_input(instr)) {
|
||||
clear_cache(ctx, NULL);
|
||||
} else {
|
||||
/* invalidate only the necessary entries.. */
|
||||
@@ -281,7 +287,7 @@ struct ir3_sched_notes {
|
||||
/* there is at least one instruction that could be scheduled,
|
||||
* except for conflicting address/predicate register usage:
|
||||
*/
|
||||
bool addr_conflict, pred_conflict;
|
||||
bool addr0_conflict, addr1_conflict, pred_conflict;
|
||||
};
|
||||
|
||||
/* could an instruction be scheduled if specified ssa src was scheduled? */
|
||||
@@ -314,11 +320,28 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
|
||||
* TODO if any instructions use pred register and have other
|
||||
* src args, we would need to do the same for writes_pred()..
|
||||
*/
|
||||
if (writes_addr(instr)) {
|
||||
if (writes_addr0(instr)) {
|
||||
struct ir3 *ir = instr->block->shader;
|
||||
bool ready = false;
|
||||
for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
|
||||
struct ir3_instruction *indirect = ir->indirects[i];
|
||||
for (unsigned i = 0; (i < ir->a0_users_count) && !ready; i++) {
|
||||
struct ir3_instruction *indirect = ir->a0_users[i];
|
||||
if (!indirect)
|
||||
continue;
|
||||
if (indirect->address != instr)
|
||||
continue;
|
||||
ready = could_sched(indirect, instr);
|
||||
}
|
||||
|
||||
/* nothing could be scheduled, so keep looking: */
|
||||
if (!ready)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (writes_addr1(instr)) {
|
||||
struct ir3 *ir = instr->block->shader;
|
||||
bool ready = false;
|
||||
for (unsigned i = 0; (i < ir->a1_users_count) && !ready; i++) {
|
||||
struct ir3_instruction *indirect = ir->a1_users[i];
|
||||
if (!indirect)
|
||||
continue;
|
||||
if (indirect->address != instr)
|
||||
@@ -335,9 +358,15 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
|
||||
* register is currently in use, we need to defer until it is
|
||||
* free:
|
||||
*/
|
||||
if (writes_addr(instr) && ctx->addr) {
|
||||
debug_assert(ctx->addr != instr);
|
||||
notes->addr_conflict = true;
|
||||
if (writes_addr0(instr) && ctx->addr0) {
|
||||
debug_assert(ctx->addr0 != instr);
|
||||
notes->addr0_conflict = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (writes_addr1(instr) && ctx->addr1) {
|
||||
debug_assert(ctx->addr1 != instr);
|
||||
notes->addr1_conflict = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -585,23 +614,21 @@ split_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr)
|
||||
return new_instr;
|
||||
}
|
||||
|
||||
/* "spill" the address register by remapping any unscheduled
|
||||
/* "spill" the address registers by remapping any unscheduled
|
||||
* instructions which depend on the current address register
|
||||
* to a clone of the instruction which wrote the address reg.
|
||||
*/
|
||||
static struct ir3_instruction *
|
||||
split_addr(struct ir3_sched_ctx *ctx)
|
||||
split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
|
||||
struct ir3_instruction **users, unsigned users_count)
|
||||
{
|
||||
struct ir3 *ir;
|
||||
struct ir3_instruction *new_addr = NULL;
|
||||
unsigned i;
|
||||
|
||||
debug_assert(ctx->addr);
|
||||
debug_assert(*addr);
|
||||
|
||||
ir = ctx->addr->block->shader;
|
||||
|
||||
for (i = 0; i < ir->indirects_count; i++) {
|
||||
struct ir3_instruction *indirect = ir->indirects[i];
|
||||
for (i = 0; i < users_count; i++) {
|
||||
struct ir3_instruction *indirect = users[i];
|
||||
|
||||
if (!indirect)
|
||||
continue;
|
||||
@@ -613,9 +640,9 @@ split_addr(struct ir3_sched_ctx *ctx)
|
||||
/* remap remaining instructions using current addr
|
||||
* to new addr:
|
||||
*/
|
||||
if (indirect->address == ctx->addr) {
|
||||
if (indirect->address == *addr) {
|
||||
if (!new_addr) {
|
||||
new_addr = split_instr(ctx, ctx->addr);
|
||||
new_addr = split_instr(ctx, *addr);
|
||||
/* original addr is scheduled, but new one isn't: */
|
||||
new_addr->flags &= ~IR3_INSTR_MARK;
|
||||
}
|
||||
@@ -625,7 +652,7 @@ split_addr(struct ir3_sched_ctx *ctx)
|
||||
}
|
||||
|
||||
/* all remaining indirects remapped to new addr: */
|
||||
ctx->addr = NULL;
|
||||
*addr = NULL;
|
||||
|
||||
return new_addr;
|
||||
}
|
||||
@@ -682,7 +709,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||
ctx->block = block;
|
||||
|
||||
/* addr/pred writes are per-block: */
|
||||
ctx->addr = NULL;
|
||||
ctx->addr0 = NULL;
|
||||
ctx->addr1 = NULL;
|
||||
ctx->pred = NULL;
|
||||
|
||||
/* move all instructions to the unscheduled list, and
|
||||
@@ -740,14 +768,19 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||
schedule(ctx, instr);
|
||||
} else {
|
||||
struct ir3_instruction *new_instr = NULL;
|
||||
struct ir3 *ir = block->shader;
|
||||
|
||||
/* nothing available to schedule.. if we are blocked on
|
||||
* address/predicate register conflict, then break the
|
||||
* deadlock by cloning the instruction that wrote that
|
||||
* reg:
|
||||
*/
|
||||
if (notes.addr_conflict) {
|
||||
new_instr = split_addr(ctx);
|
||||
if (notes.addr0_conflict) {
|
||||
new_instr = split_addr(ctx, &ctx->addr0,
|
||||
ir->a0_users, ir->a0_users_count);
|
||||
} else if (notes.addr1_conflict) {
|
||||
new_instr = split_addr(ctx, &ctx->addr1,
|
||||
ir->a1_users, ir->a1_users_count);
|
||||
} else if (notes.pred_conflict) {
|
||||
new_instr = split_pred(ctx);
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user