From 8313016543758dc30c862e686f12a40f32b95f47 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 19 Feb 2023 23:08:58 -0500 Subject: [PATCH] nir/lower_blend: Consume dual stores Now that we're working on lowered I/O, passing in the dual source blend colour via a sideband doesn't make any sense. The primary source blend colours are implicitly passed in as the sources of store_output intrinsics; likewise, we should get dual source blend colours from their respective stores. And since dual colours are only needed by blending, we can delete the stores as we go. That means nir_lower_blend now provides an all-in-one software lowering of dual source blending with no driver support needed! It even works for 8 dual-src render targets, but I don't have a use case for that. The only tricky bit here is making sure we are robust against different orders of store_output within the exit block. In particular, if we naively lower x = ... primary color = x y = ... dual color = y we end up emitting uses of y before it has been defined, something like x = ... primary color = blend(x, y) y = ... Instead, we remove dual stores and sink blend stores to the bottom of the block, so we end up with the correct x = ... y = ... primary color = blend(x, y) lower_io_to_temporaries ensures that the stores will be in the same (exit) block, so we don't need to sink further than that ourselves. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Faith Ekstrand Part-of: --- src/compiler/nir/nir_lower_blend.c | 71 +++++++++++++++++++++++++++--- src/compiler/nir/nir_lower_blend.h | 2 - src/panfrost/lib/pan_blend.c | 56 ++++++++++------------- 3 files changed, 90 insertions(+), 39 deletions(-) diff --git a/src/compiler/nir/nir_lower_blend.c b/src/compiler/nir/nir_lower_blend.c index 5e5e9e4ccdf..d80241aafb7 100644 --- a/src/compiler/nir/nir_lower_blend.c +++ b/src/compiler/nir/nir_lower_blend.c @@ -36,6 +36,11 @@ #include "compiler/nir/nir_format_convert.h" #include "nir_lower_blend.h" +struct ctx { + const nir_lower_blend_options *options; + nir_ssa_def *src1[8]; +}; + /* Given processed factors, combine them per a blend function */ static nir_ssa_def * @@ -364,8 +369,10 @@ nir_blend( bconst = nir_load_blend_const_color_rgba(b); } - if (src->bit_size == 16) + if (src->bit_size == 16) { bconst = nir_f2f16(b, bconst); + src1 = nir_f2f16(b, src1); + } /* Fixed-point framebuffers require their inputs clamped. */ enum pipe_format format = options->format[rt]; @@ -464,7 +471,8 @@ nir_blend_replace_rt(const nir_lower_blend_rt *rt) static bool nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) { - const nir_lower_blend_options *options = data; + struct ctx *ctx = data; + const nir_lower_blend_options *options = ctx->options; if (instr->type != nir_instr_type_intrinsic) return false; @@ -479,7 +487,16 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) return false; - b->cursor = nir_before_instr(instr); + /* Only process stores once. Pass flags are cleared by consume_dual_stores */ + if (instr->pass_flags) + return false; + + instr->pass_flags = 1; + + /* Store are sunk to the bottom of the block to ensure that the dual + * source colour is already written. + */ + b->cursor = nir_after_block(instr->block); /* Grab the input color. We always want 4 channels during blend. Dead * code will clean up any channels we don't need. @@ -514,7 +531,7 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) } else if (!util_format_is_pure_integer(options->format[rt]) && !nir_blend_replace_rt(&options->rt[rt])) { assert(!util_format_is_scaled(options->format[rt])); - blended = nir_blend(b, options, rt, src, options->src1, dst); + blended = nir_blend(b, options, rt, src, ctx->src1[rt], dst); } /* Apply a colormask if necessary */ @@ -534,6 +551,44 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) /* Write out the final color instead of the input */ nir_instr_rewrite_src_ssa(instr, &store->src[0], blended); + + /* Sink to bottom */ + nir_instr_remove(instr); + nir_builder_instr_insert(b, instr); + return true; +} + +/* + * Dual-source colours are only for blending, so when nir_lower_blend is used, + * the dual source store_output is for us (only). Remove dual stores so the + * backend doesn't have to deal with them, collecting the sources for blending. + */ +static bool +consume_dual_stores(nir_builder *b, nir_instr *instr, void *data) +{ + nir_ssa_def **outputs = data; + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr); + if (store->intrinsic != nir_intrinsic_store_output) + return false; + + /* While we're here, clear the pass flags for store_outputs, since we'll set + * them later. + */ + instr->pass_flags = 0; + + nir_io_semantics sem = nir_intrinsic_io_semantics(store); + if (sem.dual_source_blend_index == 0) + return false; + + int rt = color_index_for_location(sem.location); + assert(rt >= 0 && rt < 8 && "bounds for dual-source blending"); + assert(store->src[0].is_ssa && "must be SSA"); + + outputs[rt] = store->src[0].ssa; + nir_instr_remove(instr); return true; } @@ -547,8 +602,14 @@ nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options) { assert(shader->info.stage == MESA_SHADER_FRAGMENT); + struct ctx ctx = {.options = options}; + nir_shader_instructions_pass(shader, consume_dual_stores, + nir_metadata_block_index | + nir_metadata_dominance, + ctx.src1); + nir_shader_instructions_pass(shader, nir_lower_blend_instr, nir_metadata_block_index | nir_metadata_dominance, - (void *)options); + &ctx); } diff --git a/src/compiler/nir/nir_lower_blend.h b/src/compiler/nir/nir_lower_blend.h index 50a46ce7a72..aaf45664125 100644 --- a/src/compiler/nir/nir_lower_blend.h +++ b/src/compiler/nir/nir_lower_blend.h @@ -57,8 +57,6 @@ typedef struct { bool logicop_enable; unsigned logicop_func; - nir_ssa_def *src1; - /* If set, will use load_blend_const_color_{r,g,b,a}_float instead of * load_blend_const_color_rgba */ bool scalar_blend_const; diff --git a/src/panfrost/lib/pan_blend.c b/src/panfrost/lib/pan_blend.c index 865919fda13..cda49b9aa9d 100644 --- a/src/panfrost/lib/pan_blend.c +++ b/src/panfrost/lib/pan_blend.c @@ -663,49 +663,41 @@ GENX(pan_blend_create_shader)(const struct panfrost_device *dev, rt_state->equation.alpha_invert_dst_factor; } - nir_alu_type src_types[] = {src0_type ?: nir_type_float32, - src1_type ?: nir_type_float32}; - - /* HACK: workaround buggy TGSI shaders (u_blitter) */ - for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) { - src_types[i] = nir_alu_type_get_base_type(nir_type) | - nir_alu_type_get_type_size(src_types[i]); - } - nir_ssa_def *pixel = nir_load_barycentric_pixel(&b, 32, .interp_mode = 1); nir_ssa_def *zero = nir_imm_int(&b, 0); - nir_ssa_def *s_src[2]; for (unsigned i = 0; i < 2; ++i) { - s_src[i] = nir_load_interpolated_input( - &b, 4, nir_alu_type_get_type_size(src_types[i]), pixel, zero, - .io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0, - .io_semantics.num_slots = 1, .base = i, .dest_type = src_types[i]); - } + nir_alu_type src_type = + (i == 1 ? src1_type : src0_type) ?: nir_type_float32; - /* On Midgard, the blend shader is responsible for format conversion. - * As the OpenGL spec requires integer conversions to saturate, we must - * saturate ourselves here. On Bifrost and later, the conversion - * hardware handles this automatically. - */ - for (int i = 0; i < ARRAY_SIZE(s_src); ++i) { + /* HACK: workaround buggy TGSI shaders (u_blitter) */ + src_type = nir_alu_type_get_base_type(nir_type) | + nir_alu_type_get_type_size(src_type); + + nir_ssa_def *src = nir_load_interpolated_input( + &b, 4, nir_alu_type_get_type_size(src_type), pixel, zero, + .io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0, + .io_semantics.num_slots = 1, .base = i, .dest_type = src_type); + + /* On Midgard, the blend shader is responsible for format conversion. + * As the OpenGL spec requires integer conversions to saturate, we must + * saturate ourselves here. On Bifrost and later, the conversion + * hardware handles this automatically. + */ nir_alu_type T = nir_alu_type_get_base_type(nir_type); bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float); - s_src[i] = - nir_convert_with_rounding(&b, s_src[i], src_types[i], nir_type, - nir_rounding_mode_undef, should_saturate); + src = nir_convert_with_rounding(&b, src, T, nir_type, + nir_rounding_mode_undef, should_saturate); + + nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4), + .src_type = nir_type, + .io_semantics.location = FRAG_RESULT_DATA0 + rt, + .io_semantics.num_slots = 1, + .io_semantics.dual_source_blend_index = i); } - /* Build a trivial blend shader */ - nir_store_output(&b, s_src[0], zero, .write_mask = BITFIELD_MASK(4), - .src_type = nir_type, - .io_semantics.location = FRAG_RESULT_DATA0 + rt, - .io_semantics.num_slots = 1); - b.shader->info.io_lowered = true; - options.src1 = s_src[1]; - NIR_PASS_V(b.shader, nir_lower_blend, &options); nir_shader_instructions_pass( b.shader, pan_inline_blend_constants,