From d1d3ebb48cc693d66b064871c1118c498b6667c0 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Thu, 18 Nov 2021 20:53:18 -0800 Subject: [PATCH] lima: implement dual source blend It was a bit trickier to RE, since blob doesn't expose this functionality at all, however we had a clue from the very beginning: lima_blend_factor is 3 bits, i.e. 8 values, but only 5 of them were used, it just waited till someone tried what 3 unused values do. Interestingly enough, it turns out "5" works just as "0" (which is PIPE_BLENDFACTOR_*SRC_*), but only if output register for gl_FragColor is $0, So it looks suspiciously similar with PIPE_BLENDFACTOR_*SRC1_* behavior, and looks like secondary output is taken from $0. Since output regs for all other outputs are configured via RSW, there must be a field in RSW for output register for secondary color, it's likely 4 bits and it's currently set to 0 for reg $0. Then it was just a matter of brute-forcing various consecutive 4 bits in RSW - and indeed, setting top 4 bits of rsw->aux0 to the index of gl_FragColor output register fixes blending tests when we use "5" blend factor instead of "0". So it must be a register number for gl_SecondaryFragColor. Unlike gl_FragColor, the field is only repeated once in RSW. Wire it up in compiler, and piglit arb_blend_func_extended now passes. Reviewed-by: Andreas Baierl Reviewed-by: Erico Nunes Signed-off-by: Vasily Khoruzhick Part-of: --- docs/features.txt | 2 +- src/gallium/drivers/lima/ir/pp/nir.c | 4 +- src/gallium/drivers/lima/ir/pp/ppir.h | 14 ++- src/gallium/drivers/lima/ir/pp/regalloc.c | 9 +- src/gallium/drivers/lima/lima_context.h | 3 +- src/gallium/drivers/lima/lima_draw.c | 122 ++++++---------------- src/gallium/drivers/lima/lima_parser.c | 5 + src/gallium/drivers/lima/lima_parser.h | 10 +- src/gallium/drivers/lima/lima_screen.c | 3 + 9 files changed, 68 insertions(+), 104 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index bc57eba78ee..458c2d79815 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -99,7 +99,7 @@ GL 3.2, GLSL 1.50 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llv GL 3.3, GLSL 3.30 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, virgl, zink, d3d12 - GL_ARB_blend_func_extended DONE (freedreno/a3xx, freedreno/a6xx, swr, panfrost) + GL_ARB_blend_func_extended DONE (freedreno/a3xx, freedreno/a6xx, swr, panfrost, lima) GL_ARB_explicit_attrib_location DONE (all drivers that support GLSL) GL_ARB_occlusion_query2 DONE (swr, v3d, vc4, panfrost, lima) GL_ARB_sampler_objects DONE (all drivers) diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index fc8030fc745..1d58708ae70 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -351,7 +351,8 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) nir_io_semantics io = nir_intrinsic_io_semantics(instr); unsigned offset = nir_src_as_uint(instr->src[1]); unsigned slot = io.location + offset; - ppir_output_type out_type = ppir_nir_output_to_ppir(slot); + ppir_output_type out_type = ppir_nir_output_to_ppir(slot, + block->comp->dual_source_blend ? io.dual_source_blend_index : 0); if (out_type == ppir_output_invalid) { ppir_debug("Unsupported output type: %d\n", slot); return false; @@ -916,6 +917,7 @@ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *n comp->ra = ra; comp->uses_discard = nir->info.fs.uses_discard; + comp->dual_source_blend = nir->info.fs.color_is_dual_source; /* 1st pass: create ppir blocks */ nir_foreach_function(function, nir) { diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h index f434786f4cf..70b95590697 100644 --- a/src/gallium/drivers/lima/ir/pp/ppir.h +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -180,7 +180,8 @@ typedef enum { } ppir_pipeline; typedef enum { - ppir_output_color, + ppir_output_color0, + ppir_output_color1, ppir_output_depth, ppir_output_num, ppir_output_invalid = -1, @@ -189,8 +190,10 @@ typedef enum { static inline const char *ppir_output_type_to_str(ppir_output_type type) { switch (type) { - case ppir_output_color: - return "OUTPUT_COLOR"; + case ppir_output_color0: + return "OUTPUT_COLOR0"; + case ppir_output_color1: + return "OUTPUT_COLOR1"; case ppir_output_depth: return "OUTPUT_DEPTH"; default: @@ -198,12 +201,12 @@ static inline const char *ppir_output_type_to_str(ppir_output_type type) } } -static inline ppir_output_type ppir_nir_output_to_ppir(gl_frag_result res) +static inline ppir_output_type ppir_nir_output_to_ppir(gl_frag_result res, int dual_src_index) { switch (res) { case FRAG_RESULT_COLOR: case FRAG_RESULT_DATA0: - return ppir_output_color; + return ppir_output_color0 + dual_src_index; case FRAG_RESULT_DEPTH: return ppir_output_depth; default: @@ -417,6 +420,7 @@ typedef struct ppir_compiler { struct ra_regs *ra; struct lima_fs_compiled_shader *prog; bool uses_discard; + bool dual_source_blend; /* for scheduler */ int sched_instr_base; diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c index 37b54b5a4bf..08cc3e76c85 100644 --- a/src/gallium/drivers/lima/ir/pp/regalloc.c +++ b/src/gallium/drivers/lima/ir/pp/regalloc.c @@ -617,7 +617,8 @@ bool ppir_regalloc_prog(ppir_compiler *comp) /* No registers? Probably shader consists of discard instruction */ if (list_is_empty(&comp->reg_list)) { - comp->prog->state.frag_color_reg = 0; + comp->prog->state.frag_color0_reg = 0; + comp->prog->state.frag_color1_reg = -1; comp->prog->state.frag_depth_reg = -1; return true; } @@ -628,8 +629,10 @@ bool ppir_regalloc_prog(ppir_compiler *comp) if (!spilled) return false; - comp->prog->state.frag_color_reg = - comp->out_type_to_reg[ppir_output_color]; + comp->prog->state.frag_color0_reg = + comp->out_type_to_reg[ppir_output_color0]; + comp->prog->state.frag_color1_reg = + comp->out_type_to_reg[ppir_output_color1]; comp->prog->state.frag_depth_reg = comp->out_type_to_reg[ppir_output_depth]; diff --git a/src/gallium/drivers/lima/lima_context.h b/src/gallium/drivers/lima/lima_context.h index bea1ddeb477..3adf67ce3c4 100644 --- a/src/gallium/drivers/lima/lima_context.h +++ b/src/gallium/drivers/lima/lima_context.h @@ -49,7 +49,8 @@ struct lima_fs_compiled_shader { struct { int shader_size; int stack_size; - int frag_color_reg; + int frag_color0_reg; + int frag_color1_reg; int frag_depth_reg; bool uses_discard; } state; diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c index 889f0192179..bde83281e14 100644 --- a/src/gallium/drivers/lima/lima_draw.c +++ b/src/gallium/drivers/lima/lima_draw.c @@ -449,107 +449,56 @@ lima_blend_func(enum pipe_blend_func pipe) return -1; } -static int -lima_blend_factor_has_alpha(enum pipe_blendfactor pipe) -{ - /* Bit 4 is set if the blendfactor uses alpha */ - switch (pipe) { - case PIPE_BLENDFACTOR_SRC_ALPHA: - case PIPE_BLENDFACTOR_DST_ALPHA: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return 1; - - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_ZERO: - case PIPE_BLENDFACTOR_ONE: - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return 0; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return -1; /* not supported */ - } - return -1; -} - -static int -lima_blend_factor_is_inv(enum pipe_blendfactor pipe) -{ - /* Bit 3 is set if the blendfactor type is inverted */ - switch (pipe) { - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - case PIPE_BLENDFACTOR_ONE: - return 1; - - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_SRC_ALPHA: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_DST_ALPHA: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_ZERO: - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return 0; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return -1; /* not supported */ - } - return -1; -} - static int lima_blend_factor(enum pipe_blendfactor pipe) { - /* Bits 0-2 indicate the blendfactor type */ + /* Bits 0-2 indicate the blendfactor type, + * Bit 3 is set if blendfactor is inverted + * Bit 4 is set if blendfactor has alpha */ switch (pipe) { case PIPE_BLENDFACTOR_SRC_COLOR: + return 0 << 4 | 0 << 3 | 0; case PIPE_BLENDFACTOR_SRC_ALPHA: + return 1 << 4 | 0 << 3 | 0; case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return 0 << 4 | 1 << 3 | 0; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return 0; + return 1 << 4 | 1 << 3 | 0; case PIPE_BLENDFACTOR_DST_COLOR: + return 0 << 4 | 0 << 3 | 1; case PIPE_BLENDFACTOR_DST_ALPHA: + return 1 << 4 | 0 << 3 | 1; case PIPE_BLENDFACTOR_INV_DST_COLOR: + return 0 << 4 | 1 << 3 | 1; case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return 1; + return 1 << 4 | 1 << 3 | 1; case PIPE_BLENDFACTOR_CONST_COLOR: + return 0 << 4 | 0 << 3 | 2; case PIPE_BLENDFACTOR_CONST_ALPHA: + return 1 << 4 | 0 << 3 | 2; case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return 0 << 4 | 1 << 3 | 2; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return 2; + return 1 << 4 | 1 << 3 | 2; case PIPE_BLENDFACTOR_ZERO: + return 0 << 4 | 0 << 3 | 3; case PIPE_BLENDFACTOR_ONE: - return 3; + return 0 << 4 | 1 << 3 | 3; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return 4; + return 0 << 4 | 0 << 3 | 4; case PIPE_BLENDFACTOR_SRC1_COLOR: + return 0 << 4 | 0 << 3 | 5; case PIPE_BLENDFACTOR_SRC1_ALPHA: + return 1 << 4 | 0 << 3 | 5; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return 0 << 4 | 1 << 3 | 5; case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return -1; /* not supported */ + return 1 << 4 | 1 << 3 | 5; } return -1; } @@ -567,20 +516,11 @@ lima_calculate_alpha_blend(enum pipe_blend_func rgb_func, enum pipe_blend_func a return lima_blend_func(rgb_func) | (lima_blend_func(alpha_func) << 3) | - (lima_blend_factor(rgb_src_factor) << 6) | - (lima_blend_factor_is_inv(rgb_src_factor) << 9) | - (lima_blend_factor_has_alpha(rgb_src_factor) << 10) | - (lima_blend_factor(rgb_dst_factor) << 11) | - (lima_blend_factor_is_inv(rgb_dst_factor) << 14) | - (lima_blend_factor_has_alpha(rgb_dst_factor) << 15) | - - (lima_blend_factor(alpha_src_factor) << 16) | - (lima_blend_factor_is_inv(alpha_src_factor) << 19) | - - (lima_blend_factor(alpha_dst_factor) << 20) | - (lima_blend_factor_is_inv(alpha_dst_factor) << 23) | + /* alpha_src and alpha_dst are 4 bit, so need to mask 5th bit */ + ((lima_blend_factor(alpha_src_factor) & 0xf) << 16) | + ((lima_blend_factor(alpha_dst_factor) & 0xf) << 20) | 0x0C000000; /* need to check if this is GLESv1 glAlphaFunc */ } @@ -736,10 +676,10 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in render->multi_sample |= 0x68; /* Set gl_FragColor register, need to specify it 4 times */ - render->multi_sample |= (fs->state.frag_color_reg << 28) | - (fs->state.frag_color_reg << 24) | - (fs->state.frag_color_reg << 20) | - (fs->state.frag_color_reg << 16); + render->multi_sample |= (fs->state.frag_color0_reg << 28) | + (fs->state.frag_color0_reg << 24) | + (fs->state.frag_color0_reg << 20) | + (fs->state.frag_color0_reg << 16); /* alpha test */ if (ctx->zsa->base.alpha_enabled) { @@ -807,6 +747,10 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in render->aux1 |= 0x10000; } + /* Set secondary output color */ + if (fs->state.frag_color1_reg != -1) + render->aux0 |= (fs->state.frag_color1_reg << 28); + if (ctx->vs->state.num_varyings) { render->varying_types = 0x00000000; render->varyings_address = ctx->gp_output->va + diff --git a/src/gallium/drivers/lima/lima_parser.c b/src/gallium/drivers/lima/lima_parser.c index bcacd290aff..f885c771340 100644 --- a/src/gallium/drivers/lima/lima_parser.c +++ b/src/gallium/drivers/lima/lima_parser.c @@ -674,6 +674,11 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper) if ((*value & 0x00002000) == 0x00002000) /* bit 13 unknown */ fprintf(fp, ", bit 13 set"); + + fprintf(fp, " */\n"); + fprintf(fp, "\n\t\t\t\t\t\t/* %s(3):", render_state_infos[i].info); + fprintf(fp, " register for gl_SecondaryFragColor: $%d", + (*value & 0xf0000000) >> 28); fprintf(fp, " */\n"); break; case 14: /* AUX1 */ diff --git a/src/gallium/drivers/lima/lima_parser.h b/src/gallium/drivers/lima/lima_parser.h index dc99d49164b..eed7926fb5e 100644 --- a/src/gallium/drivers/lima/lima_parser.h +++ b/src/gallium/drivers/lima/lima_parser.h @@ -61,7 +61,7 @@ static const char *PIPE_BLENDFACTOR_STRING[] = { "CONST_COLOR", /* 2 */ "ZERO", /* 3 */ "UNKNOWN_4", /* 4 */ - "UNKNOWN_5", /* 5 */ + "SRC2_COLOR", /* 5 */ "UNKNOWN_6", /* 6 */ "SRC_ALPHA_SAT", /* 7 */ "INV_SRC_COLOR", /* 8 */ @@ -69,7 +69,7 @@ static const char *PIPE_BLENDFACTOR_STRING[] = { "INV_CONST_COLOR", /* 10 */ "ONE", /* 11 */ "UNKNOWN_12", /* 12 */ - "UNKNOWN_13", /* 13 */ + "INV_SRC2_COLOR", /* 13 */ "UNKNOWN_14", /* 14 */ "UNKNOWN_15", /* 15 */ "SRC_ALPHA", /* 16 */ @@ -77,13 +77,15 @@ static const char *PIPE_BLENDFACTOR_STRING[] = { "CONST_ALPHA", /* 18 */ "UNKNOWN_19", /* 19 */ "UNKNOWN_20", /* 20 */ - "UNKNOWN_21", /* 21 */ + "SRC2_ALPHA", /* 21 */ "UNKNOWN_22", /* 22 */ "UNKNOWN_23", /* 23 */ "INV_SRC_ALPHA", /* 24 */ "INV_DST_ALPHA", /* 25 */ "INV_CONST_ALPHA", /* 26 */ - + "UNKNOWN_27", /* 27 */ + "UNKNOWN_28", /* 28 */ + "INV_SRC2_ALPHA", /* 29 */ }; static const char *LIMA_WRAP_MODE_STRING[] = { diff --git a/src/gallium/drivers/lima/lima_screen.c b/src/gallium/drivers/lima/lima_screen.c index d86d37c1b11..f3d9a4be026 100644 --- a/src/gallium/drivers/lima/lima_screen.c +++ b/src/gallium/drivers/lima/lima_screen.c @@ -168,6 +168,9 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PREFER_POT_ALIGNED_VARYINGS: return 1; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + default: return u_pipe_screen_get_param_defaults(pscreen, param); }