diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index ee02b86e502..8d00446a2a2 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -815,12 +815,6 @@ intrinsic("ssbo_atomic_xor_ir3", src_comp=[1, 1, 1, 1], dest_comp=1, i intrinsic("ssbo_atomic_exchange_ir3", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ACCESS]) intrinsic("ssbo_atomic_comp_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1, indices=[ACCESS]) -# IR3-specific instruction for UBO loads using the ldc instruction. The second -# source is the indirect offset, in units of four dwords. The base is a -# component offset, in dword units. -intrinsic("load_ubo_ir3", src_comp=[1, 1], bit_sizes=[32], dest_comp=0, indices=[BASE], - flags=[CAN_REORDER, CAN_ELIMINATE]) - # System values for freedreno geometry shaders. system_value("vs_primitive_stride_ir3", 1) system_value("vs_vertex_stride_ir3", 1) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 04a2dd9cea4..8172e113c1a 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -754,7 +754,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction *ldc = ir3_LDC(b, idx, 0, offset, 0); ldc->regs[0]->wrmask = MASK(ncomp); ldc->cat6.iim_val = ncomp; - ldc->cat6.d = nir_intrinsic_base(intr); + ldc->cat6.d = nir_intrinsic_component(intr); ldc->cat6.type = TYPE_U32; ir3_handle_bindless_cat6(ldc, intr->src[0]); @@ -1647,7 +1647,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_ubo: emit_intrinsic_load_ubo(ctx, intr, dst); break; - case nir_intrinsic_load_ubo_ir3: + case nir_intrinsic_load_ubo_vec4: emit_intrinsic_load_ubo_ldc(ctx, intr, dst); break; case nir_intrinsic_load_frag_coord: diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 7b373dff242..bd73beefac9 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -461,6 +461,9 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) /* UBO offset lowering has to come after we've decided what will * be left as load_ubo */ + if (so->shader->compiler->gpu_id >= 600) + OPT_V(s, nir_lower_ubo_vec4); + OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id); if (progress) diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 900768edb03..8a9503feb74 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -327,8 +327,8 @@ instr_is_load_ubo(nir_instr *instr) nir_intrinsic_op op = nir_instr_as_intrinsic(instr)->intrinsic; - /* ir3_nir_lower_io_offsets happens after this pass. */ - assert(op != nir_intrinsic_load_ubo_ir3); + /* nir_lower_ubo_vec4 happens after this pass. */ + assert(op != nir_intrinsic_load_ubo_vec4); return op == nir_intrinsic_load_ubo; } diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index 36c48cf1299..110197d93b9 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -253,81 +253,6 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, return true; } -static bool -lower_offset_for_ubo(nir_intrinsic_instr *intrinsic, nir_builder *b, int gpu_id) -{ - /* We only need to lower offset if using LDC, which takes an offset in - * vec4 units and has the start component baked into the instruction. - */ - if (gpu_id < 600) - return false; - - /* TODO handle other bitsizes, including non-dword-aligned loads */ - assert(intrinsic->dest.ssa.bit_size == 32); - - b->cursor = nir_before_instr(&intrinsic->instr); - - nir_intrinsic_instr *new_intrinsic = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo_ir3); - - debug_assert(intrinsic->dest.is_ssa); - new_intrinsic->src[0] = intrinsic->src[0]; - - nir_ssa_def *offset = intrinsic->src[1].ssa; - nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -4); - - if (!new_offset) - new_offset = nir_ushr(b, offset, nir_imm_int(b, 4)); - - new_intrinsic->src[1] = nir_src_for_ssa(new_offset); - - unsigned align_mul = nir_intrinsic_align_mul(intrinsic); - unsigned align_offset = nir_intrinsic_align_offset(intrinsic); - - unsigned components = intrinsic->num_components; - - if (align_mul % 16 != 0) - components = 4; - - new_intrinsic->num_components = components; - - nir_ssa_dest_init(&new_intrinsic->instr, &new_intrinsic->dest, - components, 32, NULL); - - nir_builder_instr_insert(b, &new_intrinsic->instr); - - nir_ssa_def *new_dest; - if (align_mul % 16 == 0) { - /* We know that the low 4 bits of the offset are constant and equal to - * align_offset. Use the component offset. - */ - unsigned component = align_offset / 4; - nir_intrinsic_set_base(new_intrinsic, component); - new_dest = &new_intrinsic->dest.ssa; - } else { - /* We have to assume it isn't aligned, and extract the components - * dynamically. - */ - nir_intrinsic_set_base(new_intrinsic, 0); - nir_ssa_def *component = - nir_iand(b, nir_ushr(b, offset, nir_imm_int(b, 2)), nir_imm_int(b, 3)); - nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS]; - for (unsigned i = 0; i < intrinsic->num_components; i++) { - nir_ssa_def *idx = nir_iadd(b, nir_imm_int(b, i), component); - channels[i] = nir_vector_extract(b, &new_intrinsic->dest.ssa, idx); - } - - new_dest = nir_vec(b, channels, intrinsic->num_components); - } - - nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, - nir_src_for_ssa(new_dest)); - - nir_instr_remove(&intrinsic->instr); - - return true; -} - static bool lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx, int gpu_id) { @@ -339,12 +264,6 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx, int gpu_ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - /* UBO */ - if (intr->intrinsic == nir_intrinsic_load_ubo) { - progress |= lower_offset_for_ubo(intr, b, gpu_id); - continue; - } - /* SSBO */ int ir3_intrinsic; uint8_t offset_src_idx;