freedreno/ir3: Drop wrmask for ir3 local and global store intrinsics

These intrinsics are supposed to map to the underlying hardware
instructions, which don't have wrmask. We use them when we lower
store_output in the geometry pipeline and since store_output gets
lowered to temps, we always see full wrmasks there.
This commit is contained in:
Kristian H. Kristensen
2020-05-13 13:19:57 -07:00
committed by Rob Clark
parent 4627bfcd69
commit 14969aab11
3 changed files with 35 additions and 45 deletions

View File

@@ -836,7 +836,7 @@ intrinsic("end_patch_ir3")
# between geometry stages - perhaps it's explicit access to the vertex cache. # between geometry stages - perhaps it's explicit access to the vertex cache.
# src[] = { value, offset }. # src[] = { value, offset }.
store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET]) store("shared_ir3", 2, [BASE, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { offset }. # src[] = { offset }.
load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
@@ -846,7 +846,7 @@ load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
# src[] = { value, address(vec2 of hi+lo uint32_t), offset }. # src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
# const_index[] = { write_mask, align_mul, align_offset } # const_index[] = { write_mask, align_mul, align_offset }
intrinsic("store_global_ir3", [0, 2, 1], indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) intrinsic("store_global_ir3", [0, 2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { address(vec2 of hi+lo uint32_t), offset }. # src[] = { address(vec2 of hi+lo uint32_t), offset }.
# const_index[] = { access, align_mul, align_offset } # const_index[] = { access, align_mul, align_offset }
intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])

View File

@@ -939,48 +939,27 @@ emit_intrinsic_load_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *int
ir3_split_dest(b, dst, load, 0, intr->num_components); ir3_split_dest(b, dst, load, 0, intr->num_components);
} }
/* src[] = { value, offset }. const_index[] = { base, write_mask } */ /* src[] = { value, offset }. const_index[] = { base } */
static void static void
emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr) emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{ {
struct ir3_block *b = ctx->block; struct ir3_block *b = ctx->block;
struct ir3_instruction *store, *offset; struct ir3_instruction *store, *offset;
struct ir3_instruction * const *value; struct ir3_instruction * const *value;
unsigned base, wrmask;
value = ir3_get_src(ctx, &intr->src[0]); value = ir3_get_src(ctx, &intr->src[0]);
offset = ir3_get_src(ctx, &intr->src[1])[0]; offset = ir3_get_src(ctx, &intr->src[1])[0];
base = nir_intrinsic_base(intr);
wrmask = nir_intrinsic_write_mask(intr);
/* Combine groups of consecutive enabled channels in one write
* message. We use ffs to find the first enabled channel and then ffs on
* the bit-inverse, down-shifted writemask to determine the length of
* the block of enabled bits.
*
* (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
*/
while (wrmask) {
unsigned first_component = ffs(wrmask) - 1;
unsigned length = ffs(~(wrmask >> first_component)) - 1;
store = ir3_STLW(b, offset, 0, store = ir3_STLW(b, offset, 0,
ir3_create_collect(ctx, &value[first_component], length), 0, ir3_create_collect(ctx, value, intr->num_components), 0,
create_immed(b, length), 0); create_immed(b, intr->num_components), 0);
store->cat6.dst_offset = first_component + base; store->cat6.dst_offset = nir_intrinsic_base(intr);
store->cat6.type = utype_src(intr->src[0]); store->cat6.type = utype_src(intr->src[0]);
store->barrier_class = IR3_BARRIER_SHARED_W; store->barrier_class = IR3_BARRIER_SHARED_W;
store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
array_insert(b, b->keeps, store); array_insert(b, b->keeps, store);
/* Clear the bits in the writemask that we just wrote, then try
* again to see if more channels are left.
*/
wrmask &= (15 << (first_component + length));
}
} }
/* /*

View File

@@ -191,6 +191,13 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
case nir_intrinsic_store_output: { case nir_intrinsic_store_output: {
// src[] = { value, offset }. // src[] = { value, offset }.
/* nir_lower_io_to_temporaries replaces all access to output
* variables with temp variables and then emits a nir_copy_var at
* the end of the shader. Thus, we should always get a full wrmask
* here.
*/
assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
b->cursor = nir_instr_remove(&intr->instr); b->cursor = nir_instr_remove(&intr->instr);
nir_ssa_def *vertex_id = build_vertex_id(b, state); nir_ssa_def *vertex_id = build_vertex_id(b, state);
@@ -199,10 +206,8 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
nir_intrinsic_instr *store = nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3); nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
store->src[0] = nir_src_for_ssa(intr->src[0].ssa); store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
store->src[1] = nir_src_for_ssa(offset); store->src[1] = nir_src_for_ssa(offset);
store->num_components = intr->num_components; store->num_components = intr->num_components;
nir_builder_instr_insert(b, &store->instr); nir_builder_instr_insert(b, &store->instr);
@@ -431,18 +436,22 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
b->cursor = nir_before_instr(&intr->instr); b->cursor = nir_before_instr(&intr->instr);
/* nir_lower_io_to_temporaries replaces all access to output
* variables with temp variables and then emits a nir_copy_var at
* the end of the shader. Thus, we should always get a full wrmask
* here.
*/
assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
nir_ssa_def *value = intr->src[0].ssa; nir_ssa_def *value = intr->src[0].ssa;
nir_ssa_def *address = nir_load_tess_param_base_ir3(b); nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr)); nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
nir_ssa_def *offset = build_per_vertex_offset(b, state, nir_ssa_def *offset = build_per_vertex_offset(b, state,
intr->src[1].ssa, intr->src[2].ssa, var); intr->src[1].ssa, intr->src[2].ssa, var);
nir_intrinsic_instr *store =
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address, replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)))); nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
break; break;
} }
@@ -503,11 +512,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
debug_assert(nir_intrinsic_component(intr) == 0); debug_assert(nir_intrinsic_component(intr) == 0);
nir_intrinsic_instr *store = /* nir_lower_io_to_temporaries replaces all access to output
* variables with temp variables and then emits a nir_copy_var at
* the end of the shader. Thus, we should always get a full wrmask
* here.
*/
assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
intr->src[0].ssa, address, offset); intr->src[0].ssa, address, offset);
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
} }
break; break;
} }
@@ -559,7 +572,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
store->src[2] = nir_src_for_ssa(offset); store->src[2] = nir_src_for_ssa(offset);
nir_builder_instr_insert(b, &store->instr); nir_builder_instr_insert(b, &store->instr);
store->num_components = levels[0]->num_components; store->num_components = levels[0]->num_components;
nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
if (levels[1]) { if (levels[1]) {
store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3); store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
@@ -570,7 +582,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
store->src[2] = nir_src_for_ssa(offset); store->src[2] = nir_src_for_ssa(offset);
nir_builder_instr_insert(b, &store->instr); nir_builder_instr_insert(b, &store->instr);
store->num_components = levels[1]->num_components; store->num_components = levels[1]->num_components;
nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
} }
/* Finally, Insert endpatch instruction: /* Finally, Insert endpatch instruction: