freedreno/ir3: Drop wrmask for ir3 local and global store intrinsics
These intrinsics are supposed to map to the underlying hardware instructions, which don't have wrmask. We use them when we lower store_output in the geometry pipeline and since store_output gets lowered to temps, we always see full wrmasks there.
This commit is contained in:

committed by
Rob Clark

parent
4627bfcd69
commit
14969aab11
@@ -836,7 +836,7 @@ intrinsic("end_patch_ir3")
|
|||||||
# between geometry stages - perhaps it's explicit access to the vertex cache.
|
# between geometry stages - perhaps it's explicit access to the vertex cache.
|
||||||
|
|
||||||
# src[] = { value, offset }.
|
# src[] = { value, offset }.
|
||||||
store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
|
store("shared_ir3", 2, [BASE, ALIGN_MUL, ALIGN_OFFSET])
|
||||||
# src[] = { offset }.
|
# src[] = { offset }.
|
||||||
load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||||
|
|
||||||
@@ -846,7 +846,7 @@ load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
|||||||
|
|
||||||
# src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
|
# src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
|
||||||
# const_index[] = { write_mask, align_mul, align_offset }
|
# const_index[] = { write_mask, align_mul, align_offset }
|
||||||
intrinsic("store_global_ir3", [0, 2, 1], indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
intrinsic("store_global_ir3", [0, 2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||||
# src[] = { address(vec2 of hi+lo uint32_t), offset }.
|
# src[] = { address(vec2 of hi+lo uint32_t), offset }.
|
||||||
# const_index[] = { access, align_mul, align_offset }
|
# const_index[] = { access, align_mul, align_offset }
|
||||||
intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||||
|
@@ -939,48 +939,27 @@ emit_intrinsic_load_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *int
|
|||||||
ir3_split_dest(b, dst, load, 0, intr->num_components);
|
ir3_split_dest(b, dst, load, 0, intr->num_components);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* src[] = { value, offset }. const_index[] = { base, write_mask } */
|
/* src[] = { value, offset }. const_index[] = { base } */
|
||||||
static void
|
static void
|
||||||
emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||||
{
|
{
|
||||||
struct ir3_block *b = ctx->block;
|
struct ir3_block *b = ctx->block;
|
||||||
struct ir3_instruction *store, *offset;
|
struct ir3_instruction *store, *offset;
|
||||||
struct ir3_instruction * const *value;
|
struct ir3_instruction * const *value;
|
||||||
unsigned base, wrmask;
|
|
||||||
|
|
||||||
value = ir3_get_src(ctx, &intr->src[0]);
|
value = ir3_get_src(ctx, &intr->src[0]);
|
||||||
offset = ir3_get_src(ctx, &intr->src[1])[0];
|
offset = ir3_get_src(ctx, &intr->src[1])[0];
|
||||||
|
|
||||||
base = nir_intrinsic_base(intr);
|
|
||||||
wrmask = nir_intrinsic_write_mask(intr);
|
|
||||||
|
|
||||||
/* Combine groups of consecutive enabled channels in one write
|
|
||||||
* message. We use ffs to find the first enabled channel and then ffs on
|
|
||||||
* the bit-inverse, down-shifted writemask to determine the length of
|
|
||||||
* the block of enabled bits.
|
|
||||||
*
|
|
||||||
* (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
|
|
||||||
*/
|
|
||||||
while (wrmask) {
|
|
||||||
unsigned first_component = ffs(wrmask) - 1;
|
|
||||||
unsigned length = ffs(~(wrmask >> first_component)) - 1;
|
|
||||||
|
|
||||||
store = ir3_STLW(b, offset, 0,
|
store = ir3_STLW(b, offset, 0,
|
||||||
ir3_create_collect(ctx, &value[first_component], length), 0,
|
ir3_create_collect(ctx, value, intr->num_components), 0,
|
||||||
create_immed(b, length), 0);
|
create_immed(b, intr->num_components), 0);
|
||||||
|
|
||||||
store->cat6.dst_offset = first_component + base;
|
store->cat6.dst_offset = nir_intrinsic_base(intr);
|
||||||
store->cat6.type = utype_src(intr->src[0]);
|
store->cat6.type = utype_src(intr->src[0]);
|
||||||
store->barrier_class = IR3_BARRIER_SHARED_W;
|
store->barrier_class = IR3_BARRIER_SHARED_W;
|
||||||
store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
|
store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
|
||||||
|
|
||||||
array_insert(b, b->keeps, store);
|
array_insert(b, b->keeps, store);
|
||||||
|
|
||||||
/* Clear the bits in the writemask that we just wrote, then try
|
|
||||||
* again to see if more channels are left.
|
|
||||||
*/
|
|
||||||
wrmask &= (15 << (first_component + length));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -191,6 +191,13 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
|
|||||||
case nir_intrinsic_store_output: {
|
case nir_intrinsic_store_output: {
|
||||||
// src[] = { value, offset }.
|
// src[] = { value, offset }.
|
||||||
|
|
||||||
|
/* nir_lower_io_to_temporaries replaces all access to output
|
||||||
|
* variables with temp variables and then emits a nir_copy_var at
|
||||||
|
* the end of the shader. Thus, we should always get a full wrmask
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
|
||||||
|
|
||||||
b->cursor = nir_instr_remove(&intr->instr);
|
b->cursor = nir_instr_remove(&intr->instr);
|
||||||
|
|
||||||
nir_ssa_def *vertex_id = build_vertex_id(b, state);
|
nir_ssa_def *vertex_id = build_vertex_id(b, state);
|
||||||
@@ -199,10 +206,8 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
|
|||||||
nir_intrinsic_instr *store =
|
nir_intrinsic_instr *store =
|
||||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
|
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
|
||||||
|
|
||||||
nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
|
|
||||||
store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
|
store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
|
||||||
store->src[1] = nir_src_for_ssa(offset);
|
store->src[1] = nir_src_for_ssa(offset);
|
||||||
|
|
||||||
store->num_components = intr->num_components;
|
store->num_components = intr->num_components;
|
||||||
|
|
||||||
nir_builder_instr_insert(b, &store->instr);
|
nir_builder_instr_insert(b, &store->instr);
|
||||||
@@ -431,18 +436,22 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
|
|||||||
|
|
||||||
b->cursor = nir_before_instr(&intr->instr);
|
b->cursor = nir_before_instr(&intr->instr);
|
||||||
|
|
||||||
|
/* nir_lower_io_to_temporaries replaces all access to output
|
||||||
|
* variables with temp variables and then emits a nir_copy_var at
|
||||||
|
* the end of the shader. Thus, we should always get a full wrmask
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
|
||||||
|
|
||||||
nir_ssa_def *value = intr->src[0].ssa;
|
nir_ssa_def *value = intr->src[0].ssa;
|
||||||
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
|
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
|
||||||
nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
|
nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
|
||||||
nir_ssa_def *offset = build_per_vertex_offset(b, state,
|
nir_ssa_def *offset = build_per_vertex_offset(b, state,
|
||||||
intr->src[1].ssa, intr->src[2].ssa, var);
|
intr->src[1].ssa, intr->src[2].ssa, var);
|
||||||
|
|
||||||
nir_intrinsic_instr *store =
|
|
||||||
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
|
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
|
||||||
nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
|
nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
|
||||||
|
|
||||||
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -503,11 +512,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
|
|||||||
|
|
||||||
debug_assert(nir_intrinsic_component(intr) == 0);
|
debug_assert(nir_intrinsic_component(intr) == 0);
|
||||||
|
|
||||||
nir_intrinsic_instr *store =
|
/* nir_lower_io_to_temporaries replaces all access to output
|
||||||
|
* variables with temp variables and then emits a nir_copy_var at
|
||||||
|
* the end of the shader. Thus, we should always get a full wrmask
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
|
||||||
|
|
||||||
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
|
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
|
||||||
intr->src[0].ssa, address, offset);
|
intr->src[0].ssa, address, offset);
|
||||||
|
|
||||||
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -559,7 +572,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
|
|||||||
store->src[2] = nir_src_for_ssa(offset);
|
store->src[2] = nir_src_for_ssa(offset);
|
||||||
nir_builder_instr_insert(b, &store->instr);
|
nir_builder_instr_insert(b, &store->instr);
|
||||||
store->num_components = levels[0]->num_components;
|
store->num_components = levels[0]->num_components;
|
||||||
nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
|
|
||||||
|
|
||||||
if (levels[1]) {
|
if (levels[1]) {
|
||||||
store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
|
store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
|
||||||
@@ -570,7 +582,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
|
|||||||
store->src[2] = nir_src_for_ssa(offset);
|
store->src[2] = nir_src_for_ssa(offset);
|
||||||
nir_builder_instr_insert(b, &store->instr);
|
nir_builder_instr_insert(b, &store->instr);
|
||||||
store->num_components = levels[1]->num_components;
|
store->num_components = levels[1]->num_components;
|
||||||
nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finally, Insert endpatch instruction:
|
/* Finally, Insert endpatch instruction:
|
||||||
|
Reference in New Issue
Block a user