From 93a176b6cff0ed7bfddbb9f6765ace4df89eceb2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 21 Apr 2021 15:37:26 -0400 Subject: [PATCH] panfrost: Key blend shaders to the input types On Bifrost, fragment shaders might output either FP16 or FP32. The blend shader will access the output as-is within the register, so depending on the precision of the blend shader's logic, it may need to insert a f2f16 or f2f32 conversion. This requires expanding the blend shader key. Signed-off-by: Alyssa Rosenzweig Part-of: --- .../panfrost/ci/deqp-panfrost-g52-fails.txt | 2 - src/gallium/drivers/panfrost/pan_blend_cso.c | 15 ++++- src/panfrost/lib/pan_blend.c | 59 ++++++++----------- src/panfrost/lib/pan_blend.h | 5 ++ src/panfrost/lib/pan_blitter.c | 24 ++++++-- 5 files changed, 62 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt b/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt index c13e4ef2a34..8f7cfc34ad2 100644 --- a/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt +++ b/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt @@ -29,11 +29,9 @@ dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.13, dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.14,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.15,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.16,Fail -dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.17,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.18,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.19,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.1,Fail -dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.2,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.3,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.4,Fail dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.6,Fail diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index 0d5cf2d67d2..b674daa9fbe 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -168,9 +168,22 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc PAN_BO_ACCESS_FRAGMENT); } + struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); + + /* Default for Midgard */ + nir_alu_type col0_type = nir_type_float32; + nir_alu_type col1_type = nir_type_float32; + + /* Bifrost has per-output types, respect them */ + if (pan_is_bifrost(dev)) { + col0_type = ss->info.bifrost.blend[rti].type; + col1_type = ss->info.bifrost.blend_src1_type; + } + pthread_mutex_lock(&dev->blend_shaders.lock); struct pan_blend_shader_variant *shader = - pan_blend_get_shader_locked(dev, &pan_blend, rti); + pan_blend_get_shader_locked(dev, &pan_blend, + col0_type, col1_type, rti); /* Size check */ assert((*shader_offset + shader->binary.size) < 4096); diff --git a/src/panfrost/lib/pan_blend.c b/src/panfrost/lib/pan_blend.c index 7fb3e8a8bdd..88cbc02aba8 100644 --- a/src/panfrost/lib/pan_blend.c +++ b/src/panfrost/lib/pan_blend.c @@ -30,6 +30,7 @@ #include "util/format/u_format.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_conversion_builder.h" /* Fixed function blending */ @@ -448,15 +449,11 @@ get_equation_str(const struct pan_blend_rt_state *rt_state, } } -static nir_ssa_def * -nir_iclamp(nir_builder *b, nir_ssa_def *v, int32_t lo, int32_t hi) -{ - return nir_imin(b, nir_imax(b, v, nir_imm_int(b, lo)), nir_imm_int(b, hi)); -} - nir_shader * pan_blend_create_shader(const struct panfrost_device *dev, const struct pan_blend_state *state, + nir_alu_type src0_type, + nir_alu_type src1_type, unsigned rt) { const struct pan_blend_rt_state *rt_state = &state->rts[rt]; @@ -512,14 +509,22 @@ pan_blend_create_shader(const struct panfrost_device *dev, options.alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor; } + nir_alu_type src_types[] = { src0_type ?: nir_type_float32, src1_type ?: nir_type_float32 }; + + /* HACK: workaround buggy TGSI shaders (u_blitter) */ + for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) { + src_types[i] = nir_alu_type_get_base_type(nir_type) | + nir_alu_type_get_type_size(src_types[i]); + } + nir_variable *c_src = nir_variable_create(b.shader, nir_var_shader_in, - glsl_vector_type(GLSL_TYPE_FLOAT, 4), + glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4), "gl_Color"); c_src->data.location = VARYING_SLOT_COL0; nir_variable *c_src1 = nir_variable_create(b.shader, nir_var_shader_in, - glsl_vector_type(GLSL_TYPE_FLOAT, 4), + glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4), "gl_Color1"); c_src1->data.location = VARYING_SLOT_VAR0; c_src1->data.driver_location = 1; @@ -531,33 +536,13 @@ pan_blend_create_shader(const struct panfrost_device *dev, nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)}; + /* Saturate integer conversions */ for (int i = 0; i < ARRAY_SIZE(s_src); ++i) { - switch (nir_type) { - case nir_type_float16: - case nir_type_float32: - s_src[i] = nir_type_convert(&b, s_src[i], nir_type_float, nir_type); - break; - case nir_type_int32: - s_src[i] = nir_i2i32(&b, s_src[i]); - break; - case nir_type_uint32: - s_src[i] = nir_u2u32(&b, s_src[i]); - break; - case nir_type_int16: - s_src[i] = nir_i2i16(&b, nir_iclamp(&b, s_src[i], -32768, 32767)); - break; - case nir_type_uint16: - s_src[i] = nir_u2u16(&b, nir_umin(&b, s_src[i], nir_imm_int(&b, 65535))); - break; - case nir_type_int8: - s_src[i] = nir_i2i8(&b, nir_iclamp(&b, s_src[i], -128, 127)); - break; - case nir_type_uint8: - s_src[i] = nir_u2u8(&b, nir_umin(&b, s_src[i], nir_imm_int(&b, 255))); - break; - default: - unreachable("Unhandled source type to blend shader"); - } + bool is_float = nir_alu_type_get_base_type(nir_type); + s_src[i] = nir_convert_with_rounding(&b, s_src[i], + src_types[i], nir_type, + nir_rounding_mode_undef, + !is_float); } /* Build a trivial blend shader */ @@ -629,10 +614,14 @@ pan_blend_get_bifrost_desc(const struct panfrost_device *dev, struct pan_blend_shader_variant * pan_blend_get_shader_locked(const struct panfrost_device *dev, const struct pan_blend_state *state, + nir_alu_type src0_type, + nir_alu_type src1_type, unsigned rt) { struct pan_blend_shader_key key = { .format = state->rts[rt].format, + .src0_type = src0_type, + .src1_type = src1_type, .rt = rt, .has_constants = pan_blend_constant_mask(state, rt) != 0, .logicop_enable = state->logicop_enable, @@ -674,7 +663,7 @@ pan_blend_get_shader_locked(const struct panfrost_device *dev, util_dynarray_clear(&variant->binary); } - nir_shader *nir = pan_blend_create_shader(dev, state, rt); + nir_shader *nir = pan_blend_create_shader(dev, state, src0_type, src1_type, rt); /* Compile the NIR shader */ struct panfrost_compile_inputs inputs = { diff --git a/src/panfrost/lib/pan_blend.h b/src/panfrost/lib/pan_blend.h index 6cd4dc6ca9f..0f00cbad74a 100644 --- a/src/panfrost/lib/pan_blend.h +++ b/src/panfrost/lib/pan_blend.h @@ -71,6 +71,7 @@ struct pan_blend_state { struct pan_blend_shader_key { enum pipe_format format; + nir_alu_type src0_type, src1_type; unsigned rt : 3; unsigned has_constants : 1; unsigned logicop_enable : 1; @@ -125,6 +126,8 @@ pan_blend_to_fixed_function_equation(const struct panfrost_device *dev, nir_shader * pan_blend_create_shader(const struct panfrost_device *dev, const struct pan_blend_state *state, + nir_alu_type src0_type, + nir_alu_type src1_type, unsigned rt); uint64_t @@ -138,6 +141,8 @@ pan_blend_get_bifrost_desc(const struct panfrost_device *dev, struct pan_blend_shader_variant * pan_blend_get_shader_locked(const struct panfrost_device *dev, const struct pan_blend_state *state, + nir_alu_type src0_type, + nir_alu_type src1_type, unsigned rt); void diff --git a/src/panfrost/lib/pan_blitter.c b/src/panfrost/lib/pan_blitter.c index 7cfe2d9614d..0e1e7de671d 100644 --- a/src/panfrost/lib/pan_blitter.c +++ b/src/panfrost/lib/pan_blitter.c @@ -76,10 +76,12 @@ struct pan_blit_shader_data { struct pan_blit_shader_key key; mali_ptr address; unsigned blend_ret_offsets[8]; + nir_alu_type blend_types[8]; }; struct pan_blit_blend_shader_key { enum pipe_format format; + nir_alu_type type; unsigned rt : 3; unsigned nr_samples : 5; }; @@ -361,6 +363,7 @@ static void pan_blitter_get_blend_shaders(struct panfrost_device *dev, unsigned rt_count, const struct pan_image_view **rts, + const struct pan_blit_shader_data *blit_shader, mali_ptr *blend_shaders) { if (!rt_count) @@ -378,6 +381,7 @@ pan_blitter_get_blend_shaders(struct panfrost_device *dev, .format = rts[i]->format, .rt = i, .nr_samples = rts[i]->image->layout.nr_samples, + .type = blit_shader->blend_types[i], }; pthread_mutex_lock(&dev->blitter.shaders.lock); @@ -413,7 +417,10 @@ pan_blitter_get_blend_shaders(struct panfrost_device *dev, pthread_mutex_lock(&dev->blend_shaders.lock); struct pan_blend_shader_variant *b = - pan_blend_get_shader_locked(dev, &blend_state, i); + pan_blend_get_shader_locked(dev, &blend_state, + blit_shader->blend_types[i], + nir_type_float32, /* unused */ + i); assert(b->work_reg_count <= 4); struct panfrost_ptr bin = @@ -561,8 +568,12 @@ pan_blitter_get_blit_shader(struct panfrost_device *dev, if (!pan_is_bifrost(dev)) shader->address |= info.midgard.first_tag; - for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) - shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset; + if (pan_is_bifrost(dev)) { + for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) { + shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset; + shader->blend_types[i] = info.bifrost.blend[i].type; + } + } _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader); @@ -632,9 +643,12 @@ pan_blitter_get_rsd(struct panfrost_device *dev, mali_ptr blend_shaders[8] = { 0 }; - pan_blitter_get_blend_shaders(dev, rt_count, rts, blend_shaders); + const struct pan_blit_shader_data *blit_shader = + pan_blitter_get_blit_shader(dev, &blit_key); - pan_blitter_emit_rsd(dev, pan_blitter_get_blit_shader(dev, &blit_key), + pan_blitter_get_blend_shaders(dev, rt_count, rts, blit_shader, blend_shaders); + + pan_blitter_emit_rsd(dev, blit_shader, MAX2(rt_count, 1), rts, blend_shaders, z, s, rsd_ptr.cpu); rsd->address = rsd_ptr.gpu;