diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 8f568d4779a..671f3dbd2ab 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2014,10 +2014,12 @@ emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr) { assert(instr->num_components == 1); + struct qreg offset = ntq_get_src(c, instr->src[1], 0); + uint32_t base_offset = nir_intrinsic_base(instr); - struct qreg src_offset = ntq_get_src(c, instr->src[1], 0); - struct qreg offset = - vir_ADD(c, vir_uniform_ui(c, base_offset), src_offset); + + if (base_offset) + offset = vir_ADD(c, vir_uniform_ui(c, base_offset), offset); /* Usually, for VS or FS, we only emit outputs once at program end so * our VPM writes are never in non-uniform control flow, but this diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c index 2d3d307ef2f..d35061d33a1 100644 --- a/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/src/broadcom/compiler/v3d_nir_lower_io.c @@ -81,10 +81,17 @@ v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset, intr->num_components = 1; intr->src[0] = nir_src_for_ssa(chan); - if (offset) - intr->src[1] = nir_src_for_ssa(offset); - else + if (offset) { + /* When generating the VIR instruction, the base and the offset + * are just going to get added together with an ADD instruction + * so we might as well do the add here at the NIR level instead + * and let the constant folding do its magic. + */ + intr->src[1] = nir_src_for_ssa(nir_iadd_imm(b, offset, base)); + base = 0; + } else { intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); + } nir_intrinsic_set_base(intr, base); nir_intrinsic_set_write_mask(intr, 0x1);