panfrost: Key blend shaders to the input types
On Bifrost, fragment shaders might output either FP16 or FP32. The blend shader will access the output as-is within the register, so depending on the precision of the blend shader's logic, it may need to insert a f2f16 or f2f32 conversion. This requires expanding the blend shader key. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10393>
This commit is contained in:

committed by
Marge Bot

parent
9df1d1306e
commit
93a176b6cf
@@ -29,11 +29,9 @@ dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.13,
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.14,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.15,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.16,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.17,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.18,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.19,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.1,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.2,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.3,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.4,Fail
|
||||
dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.6,Fail
|
||||
|
@@ -168,9 +168,22 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc
|
||||
PAN_BO_ACCESS_FRAGMENT);
|
||||
}
|
||||
|
||||
struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
|
||||
|
||||
/* Default for Midgard */
|
||||
nir_alu_type col0_type = nir_type_float32;
|
||||
nir_alu_type col1_type = nir_type_float32;
|
||||
|
||||
/* Bifrost has per-output types, respect them */
|
||||
if (pan_is_bifrost(dev)) {
|
||||
col0_type = ss->info.bifrost.blend[rti].type;
|
||||
col1_type = ss->info.bifrost.blend_src1_type;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&dev->blend_shaders.lock);
|
||||
struct pan_blend_shader_variant *shader =
|
||||
pan_blend_get_shader_locked(dev, &pan_blend, rti);
|
||||
pan_blend_get_shader_locked(dev, &pan_blend,
|
||||
col0_type, col1_type, rti);
|
||||
|
||||
/* Size check */
|
||||
assert((*shader_offset + shader->binary.size) < 4096);
|
||||
|
@@ -30,6 +30,7 @@
|
||||
#include "util/format/u_format.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_conversion_builder.h"
|
||||
|
||||
/* Fixed function blending */
|
||||
|
||||
@@ -448,15 +449,11 @@ get_equation_str(const struct pan_blend_rt_state *rt_state,
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
nir_iclamp(nir_builder *b, nir_ssa_def *v, int32_t lo, int32_t hi)
|
||||
{
|
||||
return nir_imin(b, nir_imax(b, v, nir_imm_int(b, lo)), nir_imm_int(b, hi));
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
pan_blend_create_shader(const struct panfrost_device *dev,
|
||||
const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type,
|
||||
nir_alu_type src1_type,
|
||||
unsigned rt)
|
||||
{
|
||||
const struct pan_blend_rt_state *rt_state = &state->rts[rt];
|
||||
@@ -512,14 +509,22 @@ pan_blend_create_shader(const struct panfrost_device *dev,
|
||||
options.alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
|
||||
}
|
||||
|
||||
nir_alu_type src_types[] = { src0_type ?: nir_type_float32, src1_type ?: nir_type_float32 };
|
||||
|
||||
/* HACK: workaround buggy TGSI shaders (u_blitter) */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) {
|
||||
src_types[i] = nir_alu_type_get_base_type(nir_type) |
|
||||
nir_alu_type_get_type_size(src_types[i]);
|
||||
}
|
||||
|
||||
nir_variable *c_src =
|
||||
nir_variable_create(b.shader, nir_var_shader_in,
|
||||
glsl_vector_type(GLSL_TYPE_FLOAT, 4),
|
||||
glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4),
|
||||
"gl_Color");
|
||||
c_src->data.location = VARYING_SLOT_COL0;
|
||||
nir_variable *c_src1 =
|
||||
nir_variable_create(b.shader, nir_var_shader_in,
|
||||
glsl_vector_type(GLSL_TYPE_FLOAT, 4),
|
||||
glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4),
|
||||
"gl_Color1");
|
||||
c_src1->data.location = VARYING_SLOT_VAR0;
|
||||
c_src1->data.driver_location = 1;
|
||||
@@ -531,33 +536,13 @@ pan_blend_create_shader(const struct panfrost_device *dev,
|
||||
|
||||
nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)};
|
||||
|
||||
/* Saturate integer conversions */
|
||||
for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
|
||||
switch (nir_type) {
|
||||
case nir_type_float16:
|
||||
case nir_type_float32:
|
||||
s_src[i] = nir_type_convert(&b, s_src[i], nir_type_float, nir_type);
|
||||
break;
|
||||
case nir_type_int32:
|
||||
s_src[i] = nir_i2i32(&b, s_src[i]);
|
||||
break;
|
||||
case nir_type_uint32:
|
||||
s_src[i] = nir_u2u32(&b, s_src[i]);
|
||||
break;
|
||||
case nir_type_int16:
|
||||
s_src[i] = nir_i2i16(&b, nir_iclamp(&b, s_src[i], -32768, 32767));
|
||||
break;
|
||||
case nir_type_uint16:
|
||||
s_src[i] = nir_u2u16(&b, nir_umin(&b, s_src[i], nir_imm_int(&b, 65535)));
|
||||
break;
|
||||
case nir_type_int8:
|
||||
s_src[i] = nir_i2i8(&b, nir_iclamp(&b, s_src[i], -128, 127));
|
||||
break;
|
||||
case nir_type_uint8:
|
||||
s_src[i] = nir_u2u8(&b, nir_umin(&b, s_src[i], nir_imm_int(&b, 255)));
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled source type to blend shader");
|
||||
}
|
||||
bool is_float = nir_alu_type_get_base_type(nir_type);
|
||||
s_src[i] = nir_convert_with_rounding(&b, s_src[i],
|
||||
src_types[i], nir_type,
|
||||
nir_rounding_mode_undef,
|
||||
!is_float);
|
||||
}
|
||||
|
||||
/* Build a trivial blend shader */
|
||||
@@ -629,10 +614,14 @@ pan_blend_get_bifrost_desc(const struct panfrost_device *dev,
|
||||
struct pan_blend_shader_variant *
|
||||
pan_blend_get_shader_locked(const struct panfrost_device *dev,
|
||||
const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type,
|
||||
nir_alu_type src1_type,
|
||||
unsigned rt)
|
||||
{
|
||||
struct pan_blend_shader_key key = {
|
||||
.format = state->rts[rt].format,
|
||||
.src0_type = src0_type,
|
||||
.src1_type = src1_type,
|
||||
.rt = rt,
|
||||
.has_constants = pan_blend_constant_mask(state, rt) != 0,
|
||||
.logicop_enable = state->logicop_enable,
|
||||
@@ -674,7 +663,7 @@ pan_blend_get_shader_locked(const struct panfrost_device *dev,
|
||||
util_dynarray_clear(&variant->binary);
|
||||
}
|
||||
|
||||
nir_shader *nir = pan_blend_create_shader(dev, state, rt);
|
||||
nir_shader *nir = pan_blend_create_shader(dev, state, src0_type, src1_type, rt);
|
||||
|
||||
/* Compile the NIR shader */
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
|
@@ -71,6 +71,7 @@ struct pan_blend_state {
|
||||
|
||||
struct pan_blend_shader_key {
|
||||
enum pipe_format format;
|
||||
nir_alu_type src0_type, src1_type;
|
||||
unsigned rt : 3;
|
||||
unsigned has_constants : 1;
|
||||
unsigned logicop_enable : 1;
|
||||
@@ -125,6 +126,8 @@ pan_blend_to_fixed_function_equation(const struct panfrost_device *dev,
|
||||
nir_shader *
|
||||
pan_blend_create_shader(const struct panfrost_device *dev,
|
||||
const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type,
|
||||
nir_alu_type src1_type,
|
||||
unsigned rt);
|
||||
|
||||
uint64_t
|
||||
@@ -138,6 +141,8 @@ pan_blend_get_bifrost_desc(const struct panfrost_device *dev,
|
||||
struct pan_blend_shader_variant *
|
||||
pan_blend_get_shader_locked(const struct panfrost_device *dev,
|
||||
const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type,
|
||||
nir_alu_type src1_type,
|
||||
unsigned rt);
|
||||
|
||||
void
|
||||
|
@@ -76,10 +76,12 @@ struct pan_blit_shader_data {
|
||||
struct pan_blit_shader_key key;
|
||||
mali_ptr address;
|
||||
unsigned blend_ret_offsets[8];
|
||||
nir_alu_type blend_types[8];
|
||||
};
|
||||
|
||||
struct pan_blit_blend_shader_key {
|
||||
enum pipe_format format;
|
||||
nir_alu_type type;
|
||||
unsigned rt : 3;
|
||||
unsigned nr_samples : 5;
|
||||
};
|
||||
@@ -361,6 +363,7 @@ static void
|
||||
pan_blitter_get_blend_shaders(struct panfrost_device *dev,
|
||||
unsigned rt_count,
|
||||
const struct pan_image_view **rts,
|
||||
const struct pan_blit_shader_data *blit_shader,
|
||||
mali_ptr *blend_shaders)
|
||||
{
|
||||
if (!rt_count)
|
||||
@@ -378,6 +381,7 @@ pan_blitter_get_blend_shaders(struct panfrost_device *dev,
|
||||
.format = rts[i]->format,
|
||||
.rt = i,
|
||||
.nr_samples = rts[i]->image->layout.nr_samples,
|
||||
.type = blit_shader->blend_types[i],
|
||||
};
|
||||
|
||||
pthread_mutex_lock(&dev->blitter.shaders.lock);
|
||||
@@ -413,7 +417,10 @@ pan_blitter_get_blend_shaders(struct panfrost_device *dev,
|
||||
|
||||
pthread_mutex_lock(&dev->blend_shaders.lock);
|
||||
struct pan_blend_shader_variant *b =
|
||||
pan_blend_get_shader_locked(dev, &blend_state, i);
|
||||
pan_blend_get_shader_locked(dev, &blend_state,
|
||||
blit_shader->blend_types[i],
|
||||
nir_type_float32, /* unused */
|
||||
i);
|
||||
|
||||
assert(b->work_reg_count <= 4);
|
||||
struct panfrost_ptr bin =
|
||||
@@ -561,8 +568,12 @@ pan_blitter_get_blit_shader(struct panfrost_device *dev,
|
||||
if (!pan_is_bifrost(dev))
|
||||
shader->address |= info.midgard.first_tag;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++)
|
||||
shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
|
||||
if (pan_is_bifrost(dev)) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
|
||||
shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
|
||||
shader->blend_types[i] = info.bifrost.blend[i].type;
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
|
||||
|
||||
@@ -632,9 +643,12 @@ pan_blitter_get_rsd(struct panfrost_device *dev,
|
||||
|
||||
mali_ptr blend_shaders[8] = { 0 };
|
||||
|
||||
pan_blitter_get_blend_shaders(dev, rt_count, rts, blend_shaders);
|
||||
const struct pan_blit_shader_data *blit_shader =
|
||||
pan_blitter_get_blit_shader(dev, &blit_key);
|
||||
|
||||
pan_blitter_emit_rsd(dev, pan_blitter_get_blit_shader(dev, &blit_key),
|
||||
pan_blitter_get_blend_shaders(dev, rt_count, rts, blit_shader, blend_shaders);
|
||||
|
||||
pan_blitter_emit_rsd(dev, blit_shader,
|
||||
MAX2(rt_count, 1), rts, blend_shaders,
|
||||
z, s, rsd_ptr.cpu);
|
||||
rsd->address = rsd_ptr.gpu;
|
||||
|
Reference in New Issue
Block a user