panfrost: Key blend shaders to the input types

On Bifrost, fragment shaders might output either FP16 or FP32. The blend
shader will access the output as-is within the register, so depending on
the precision of the blend shader's logic, it may need to insert a
f2f16 or f2f32 conversion. This requires expanding the blend shader key.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10393>
This commit is contained in:
Alyssa Rosenzweig
2021-04-21 15:37:26 -04:00
committed by Marge Bot
parent 9df1d1306e
commit 93a176b6cf
5 changed files with 62 additions and 43 deletions

View File

@@ -29,11 +29,9 @@ dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.13,
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.14,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.15,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.16,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.17,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.18,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.19,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.1,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.2,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.3,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.4,Fail
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.6,Fail

View File

@@ -168,9 +168,22 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc
PAN_BO_ACCESS_FRAGMENT);
}
struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
/* Default for Midgard */
nir_alu_type col0_type = nir_type_float32;
nir_alu_type col1_type = nir_type_float32;
/* Bifrost has per-output types, respect them */
if (pan_is_bifrost(dev)) {
col0_type = ss->info.bifrost.blend[rti].type;
col1_type = ss->info.bifrost.blend_src1_type;
}
pthread_mutex_lock(&dev->blend_shaders.lock);
struct pan_blend_shader_variant *shader =
pan_blend_get_shader_locked(dev, &pan_blend, rti);
pan_blend_get_shader_locked(dev, &pan_blend,
col0_type, col1_type, rti);
/* Size check */
assert((*shader_offset + shader->binary.size) < 4096);

View File

@@ -30,6 +30,7 @@
#include "util/format/u_format.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_conversion_builder.h"
/* Fixed function blending */
@@ -448,15 +449,11 @@ get_equation_str(const struct pan_blend_rt_state *rt_state,
}
}
static nir_ssa_def *
nir_iclamp(nir_builder *b, nir_ssa_def *v, int32_t lo, int32_t hi)
{
return nir_imin(b, nir_imax(b, v, nir_imm_int(b, lo)), nir_imm_int(b, hi));
}
nir_shader *
pan_blend_create_shader(const struct panfrost_device *dev,
const struct pan_blend_state *state,
nir_alu_type src0_type,
nir_alu_type src1_type,
unsigned rt)
{
const struct pan_blend_rt_state *rt_state = &state->rts[rt];
@@ -512,14 +509,22 @@ pan_blend_create_shader(const struct panfrost_device *dev,
options.alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
}
nir_alu_type src_types[] = { src0_type ?: nir_type_float32, src1_type ?: nir_type_float32 };
/* HACK: workaround buggy TGSI shaders (u_blitter) */
for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) {
src_types[i] = nir_alu_type_get_base_type(nir_type) |
nir_alu_type_get_type_size(src_types[i]);
}
nir_variable *c_src =
nir_variable_create(b.shader, nir_var_shader_in,
glsl_vector_type(GLSL_TYPE_FLOAT, 4),
glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4),
"gl_Color");
c_src->data.location = VARYING_SLOT_COL0;
nir_variable *c_src1 =
nir_variable_create(b.shader, nir_var_shader_in,
glsl_vector_type(GLSL_TYPE_FLOAT, 4),
glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4),
"gl_Color1");
c_src1->data.location = VARYING_SLOT_VAR0;
c_src1->data.driver_location = 1;
@@ -531,33 +536,13 @@ pan_blend_create_shader(const struct panfrost_device *dev,
nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)};
/* Saturate integer conversions */
for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
switch (nir_type) {
case nir_type_float16:
case nir_type_float32:
s_src[i] = nir_type_convert(&b, s_src[i], nir_type_float, nir_type);
break;
case nir_type_int32:
s_src[i] = nir_i2i32(&b, s_src[i]);
break;
case nir_type_uint32:
s_src[i] = nir_u2u32(&b, s_src[i]);
break;
case nir_type_int16:
s_src[i] = nir_i2i16(&b, nir_iclamp(&b, s_src[i], -32768, 32767));
break;
case nir_type_uint16:
s_src[i] = nir_u2u16(&b, nir_umin(&b, s_src[i], nir_imm_int(&b, 65535)));
break;
case nir_type_int8:
s_src[i] = nir_i2i8(&b, nir_iclamp(&b, s_src[i], -128, 127));
break;
case nir_type_uint8:
s_src[i] = nir_u2u8(&b, nir_umin(&b, s_src[i], nir_imm_int(&b, 255)));
break;
default:
unreachable("Unhandled source type to blend shader");
}
bool is_float = nir_alu_type_get_base_type(nir_type);
s_src[i] = nir_convert_with_rounding(&b, s_src[i],
src_types[i], nir_type,
nir_rounding_mode_undef,
!is_float);
}
/* Build a trivial blend shader */
@@ -629,10 +614,14 @@ pan_blend_get_bifrost_desc(const struct panfrost_device *dev,
struct pan_blend_shader_variant *
pan_blend_get_shader_locked(const struct panfrost_device *dev,
const struct pan_blend_state *state,
nir_alu_type src0_type,
nir_alu_type src1_type,
unsigned rt)
{
struct pan_blend_shader_key key = {
.format = state->rts[rt].format,
.src0_type = src0_type,
.src1_type = src1_type,
.rt = rt,
.has_constants = pan_blend_constant_mask(state, rt) != 0,
.logicop_enable = state->logicop_enable,
@@ -674,7 +663,7 @@ pan_blend_get_shader_locked(const struct panfrost_device *dev,
util_dynarray_clear(&variant->binary);
}
nir_shader *nir = pan_blend_create_shader(dev, state, rt);
nir_shader *nir = pan_blend_create_shader(dev, state, src0_type, src1_type, rt);
/* Compile the NIR shader */
struct panfrost_compile_inputs inputs = {

View File

@@ -71,6 +71,7 @@ struct pan_blend_state {
struct pan_blend_shader_key {
enum pipe_format format;
nir_alu_type src0_type, src1_type;
unsigned rt : 3;
unsigned has_constants : 1;
unsigned logicop_enable : 1;
@@ -125,6 +126,8 @@ pan_blend_to_fixed_function_equation(const struct panfrost_device *dev,
nir_shader *
pan_blend_create_shader(const struct panfrost_device *dev,
const struct pan_blend_state *state,
nir_alu_type src0_type,
nir_alu_type src1_type,
unsigned rt);
uint64_t
@@ -138,6 +141,8 @@ pan_blend_get_bifrost_desc(const struct panfrost_device *dev,
struct pan_blend_shader_variant *
pan_blend_get_shader_locked(const struct panfrost_device *dev,
const struct pan_blend_state *state,
nir_alu_type src0_type,
nir_alu_type src1_type,
unsigned rt);
void

View File

@@ -76,10 +76,12 @@ struct pan_blit_shader_data {
struct pan_blit_shader_key key;
mali_ptr address;
unsigned blend_ret_offsets[8];
nir_alu_type blend_types[8];
};
struct pan_blit_blend_shader_key {
enum pipe_format format;
nir_alu_type type;
unsigned rt : 3;
unsigned nr_samples : 5;
};
@@ -361,6 +363,7 @@ static void
pan_blitter_get_blend_shaders(struct panfrost_device *dev,
unsigned rt_count,
const struct pan_image_view **rts,
const struct pan_blit_shader_data *blit_shader,
mali_ptr *blend_shaders)
{
if (!rt_count)
@@ -378,6 +381,7 @@ pan_blitter_get_blend_shaders(struct panfrost_device *dev,
.format = rts[i]->format,
.rt = i,
.nr_samples = rts[i]->image->layout.nr_samples,
.type = blit_shader->blend_types[i],
};
pthread_mutex_lock(&dev->blitter.shaders.lock);
@@ -413,7 +417,10 @@ pan_blitter_get_blend_shaders(struct panfrost_device *dev,
pthread_mutex_lock(&dev->blend_shaders.lock);
struct pan_blend_shader_variant *b =
pan_blend_get_shader_locked(dev, &blend_state, i);
pan_blend_get_shader_locked(dev, &blend_state,
blit_shader->blend_types[i],
nir_type_float32, /* unused */
i);
assert(b->work_reg_count <= 4);
struct panfrost_ptr bin =
@@ -561,8 +568,12 @@ pan_blitter_get_blit_shader(struct panfrost_device *dev,
if (!pan_is_bifrost(dev))
shader->address |= info.midgard.first_tag;
for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++)
shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
if (pan_is_bifrost(dev)) {
for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
shader->blend_types[i] = info.bifrost.blend[i].type;
}
}
_mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
@@ -632,9 +643,12 @@ pan_blitter_get_rsd(struct panfrost_device *dev,
mali_ptr blend_shaders[8] = { 0 };
pan_blitter_get_blend_shaders(dev, rt_count, rts, blend_shaders);
const struct pan_blit_shader_data *blit_shader =
pan_blitter_get_blit_shader(dev, &blit_key);
pan_blitter_emit_rsd(dev, pan_blitter_get_blit_shader(dev, &blit_key),
pan_blitter_get_blend_shaders(dev, rt_count, rts, blit_shader, blend_shaders);
pan_blitter_emit_rsd(dev, blit_shader,
MAX2(rt_count, 1), rts, blend_shaders,
z, s, rsd_ptr.cpu);
rsd->address = rsd_ptr.gpu;