glsl: enable the use of the nir based varying linker

Here as well as calling the pass we need to switch the order of
some of the information gathering and optimisation calls. We also
need to create a custom callback for the dead variables removal
pass to clean up dead builtin varying in SSO programs without
causing piglit regressions.

shader-db results IRIS (BDW):

total instructions in shared programs: 17487900 -> 17477072 (-0.06%)
instructions in affected programs: 128682 -> 117854 (-8.41%)
helped: 587
HURT: 82
helped stats (abs) min: 1 max: 145 x̄: 18.82 x̃: 20
helped stats (rel) min: 0.21% max: 77.78% x̄: 17.41% x̃: 8.85%
HURT stats (abs)   min: 1 max: 6 x̄: 2.68 x̃: 2
HURT stats (rel)   min: 0.25% max: 9.76% x̄: 2.94% x̃: 2.16%
95% mean confidence interval for instructions value: -17.71 -14.66
95% mean confidence interval for instructions %-change: -16.40% -13.42%
Instructions are helped.

total cycles in shared programs: 857442520 -> 857170199 (-0.03%)
cycles in affected programs: 112252720 -> 111980399 (-0.24%)
helped: 13733
HURT: 13349
helped stats (abs) min: 1 max: 7293 x̄: 81.44 x̃: 10
helped stats (rel) min: <.01% max: 90.32% x̄: 3.30% x̃: 0.62%
HURT stats (abs)   min: 1 max: 7424 x̄: 63.38 x̃: 8
HURT stats (rel)   min: <.01% max: 192.23% x̄: 3.28% x̃: 0.54%
95% mean confidence interval for cycles value: -14.01 -6.10
95% mean confidence interval for cycles %-change: -0.17% 0.06%
Inconclusive result (%-change mean confidence interval includes 0).

total sends in shared programs: 971443 -> 970010 (-0.15%)
sends in affected programs: 4596 -> 3163 (-31.18%)
helped: 446
HURT: 39
helped stats (abs) min: 1 max: 6 x̄: 3.40 x̃: 4
helped stats (rel) min: 3.03% max: 85.71% x̄: 46.48% x̃: 50.00%
HURT stats (abs)   min: 1 max: 3 x̄: 2.15 x̃: 2
HURT stats (rel)   min: 6.67% max: 25.00% x̄: 15.16% x̃: 10.53%
95% mean confidence interval for sends value: -3.13 -2.78
95% mean confidence interval for sends %-change: -44.16% -38.88%
Sends are helped.

LOST:   235
GAINED: 262

Shader-db results radeonsi (RX580):

169505 shaders in 102144 tests
Totals:
SGPRS: 7698832 -> 7696552 (-0.03 %)
VGPRS: 5547296 -> 5545280 (-0.04 %)
Spilled SGPRs: 14795 -> 14773 (-0.15 %)
Spilled VGPRs: 3782 -> 3782 (0.00 %)
Private memory VGPRs: 1152 -> 1152 (0.00 %)
Scratch size: 3872 -> 3872 (0.00 %) dwords per thread
Code Size: 162946528 -> 162895264 (-0.03 %) bytes
Max Waves: 2449334 -> 2449736 (0.02 %)

Totals from affected shaders:
SGPRS: 215024 -> 212744 (-1.06 %)
VGPRS: 151976 -> 149960 (-1.33 %)
Spilled SGPRs: 162 -> 140 (-13.58 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 5249916 -> 5198652 (-0.98 %) bytes
Max Waves: 54588 -> 54990 (0.74 %)

Panfrost trace checksum is updated as per discussion in:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/6343

Some virpipe tess shader piglit tests are added as failures to CI
these failures are not a regression but an uncovered existing bug
exposed due to the linker no longer sorting internally facing
shader interfaces in alphabetical order. See details in:
https://gitlab.freedesktop.org/mesa/mesa/-/issues/6481

Acked-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15731>
This commit is contained in:
Timothy Arceri
2021-11-23 12:44:07 +11:00
committed by Marge Bot
parent fa9cee4247
commit 7647023f3b
8 changed files with 63 additions and 35 deletions

View File

@@ -781,8 +781,15 @@ check_image_resources(const struct gl_constants *consts,
bool
gl_nir_link_glsl(const struct gl_constants *consts,
const struct gl_extensions *exts,
gl_api api,
struct gl_shader_program *prog)
{
if (prog->NumShaders == 0)
return true;
if (!gl_nir_link_varyings(consts, exts, api, prog))
return false;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *shader = prog->_LinkedShaders[i];
if (shader) {

View File

@@ -58,6 +58,7 @@ bool gl_nir_link_spirv(const struct gl_constants *consts,
bool gl_nir_link_glsl(const struct gl_constants *consts,
const struct gl_extensions *exts,
gl_api api,
struct gl_shader_program *prog);
bool gl_nir_link_uniforms(const struct gl_constants *consts,

View File

@@ -254,15 +254,6 @@ glsl_to_nir(const struct gl_constants *consts,
if (shader_prog->Label)
shader->info.label = ralloc_strdup(shader, shader_prog->Label);
/* Check for transform feedback varyings specified via the API */
shader->info.has_transform_feedback_varyings =
shader_prog->TransformFeedback.NumVarying > 0;
/* Check for transform feedback varyings specified in the Shader */
if (shader_prog->last_vert_prog)
shader->info.has_transform_feedback_varyings |=
shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer;
shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left;

View File

@@ -4100,10 +4100,6 @@ link_varyings_and_uniforms(unsigned first, unsigned last,
break;
}
if (!link_varyings(prog, first, last, consts, exts,
api, mem_ctx))
return false;
if (!prog->data->LinkStatus)
return false;

View File

@@ -536,11 +536,20 @@ spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-07,Fail
spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-08,Fail
spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-cumulative,Fail
spec@arb_tessellation_shader@execution@barrier-patch,Fail
spec@arb_tessellation_shader@execution@double-array-vs-tcs-tes,Fail
spec@arb_tessellation_shader@execution@double-vs-tcs-tes,Fail
spec@arb_tessellation_shader@execution@dvec2-vs-tcs-tes,Fail
spec@arb_tessellation_shader@execution@dvec3-vs-tcs-tes,Fail
spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-float-index-rd-after-barrier,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-float-index-wr-before-barrier,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec2-index-rd-after-barrier,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec2-index-wr-before-barrier,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec3-index-rd-after-barrier,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec3-index-wr-before-barrier,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec4-index-rd-after-barrier,Fail
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec4-index-wr-before-barrier,Fail
# " intrinsic copy_deref (ssa_2, ssa_3) (dst_access=0, src_access=0)
# error: glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type) (../src/compiler/nir/nir_validate.c:643)"

View File

@@ -139,8 +139,6 @@ link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
validate_ir_tree(ir);
}
build_program_resource_list(&ctx->Const, prog);
ret = st_link_nir(ctx, prog);
return ret;

View File

@@ -293,6 +293,17 @@ shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
*align = comp_size * (length == 3 ? 4 : length);
}
static bool
st_can_remove_varying_before_linking(nir_variable *var, void *data)
{
bool *is_sso = (bool *) data;
if (*is_sso) {
/* Allow the removal of unused builtins in SSO */
return var->data.location > -1 && var->data.location < VARYING_SLOT_VAR0;
} else
return true;
}
/* First third of converting glsl_to_nir.. this leaves things in a pre-
* nir_lower_io state, so that shader variants can more easily insert/
* replace variables, etc.
@@ -342,15 +353,12 @@ st_nir_preprocess(struct st_context *st, struct gl_program *prog,
NIR_PASS_V(nir, st_nir_add_point_size);
}
/* ES has strict SSO validation rules for shader IO matching so we can't
* remove dead IO until the resource list has been built. Here we skip
* removing them until later. This will potentially make the IO lowering
* calls below do a little extra work but should otherwise have no impact.
*/
if (!_mesa_is_gles(st->ctx) || !nir->info.separate_shader) {
nir_variable_mode mask = nir_var_shader_in | nir_var_shader_out;
nir_remove_dead_variables(nir, mask, NULL);
}
struct nir_remove_dead_variables_options opts;
bool is_sso = nir->info.separate_shader;
opts.can_remove_var_data = &is_sso;
opts.can_remove_var = &st_can_remove_varying_before_linking;
nir_variable_mode mask = nir_var_shader_in | nir_var_shader_out;
nir_remove_dead_variables(nir, mask, &opts);
if (options->lower_all_io_to_temps ||
nir->info.stage == MESA_SHADER_VERTEX ||
@@ -737,15 +745,6 @@ st_link_nir(struct gl_context *ctx,
st_lower_patch_vertices_in(shader_program);
/* Linking the stages in the opposite order (from fragment to vertex)
* ensures that inter-shader outputs written to in an earlier stage
* are eliminated if they are (transitively) not used in a later
* stage.
*/
for (int i = num_shaders - 2; i >= 0; i--) {
st_nir_link_shaders(linked_shader[i]->Program->nir,
linked_shader[i + 1]->Program->nir);
}
/* Linking shaders also optimizes them. Separate shaders, compute shaders
* and shaders with a fixed-func VS or FS that don't need linking are
* optimized here.
@@ -754,15 +753,42 @@ st_link_nir(struct gl_context *ctx,
gl_nir_opts(linked_shader[0]->Program->nir);
if (shader_program->data->spirv) {
/* Linking the stages in the opposite order (from fragment to vertex)
* ensures that inter-shader outputs written to in an earlier stage
* are eliminated if they are (transitively) not used in a later
* stage.
*/
for (int i = num_shaders - 2; i >= 0; i--) {
st_nir_link_shaders(linked_shader[i]->Program->nir,
linked_shader[i + 1]->Program->nir);
}
static const gl_nir_linker_options opts = {
true /*fill_parameters */
};
if (!gl_nir_link_spirv(&ctx->Const, shader_program, &opts))
return GL_FALSE;
} else {
if (!gl_nir_link_glsl(&ctx->Const, &ctx->Extensions,
if (!gl_nir_link_glsl(&ctx->Const, &ctx->Extensions, ctx->API,
shader_program))
return GL_FALSE;
/* Linking the stages in the opposite order (from fragment to vertex)
* ensures that inter-shader outputs written to in an earlier stage
* are eliminated if they are (transitively) not used in a later
* stage.
*/
for (int i = num_shaders - 2; i >= 0; i--) {
st_nir_link_shaders(linked_shader[i]->Program->nir,
linked_shader[i + 1]->Program->nir);
}
/* Tidy up any left overs from the linking process for single shaders.
* For example varying arrays that get packed may have dead elements that
* can be now be eliminated now that array access has been lowered.
*/
if (num_shaders == 1)
gl_nir_opts(linked_shader[0]->Program->nir);
}
for (unsigned i = 0; i < num_shaders; i++) {

View File

@@ -78,7 +78,7 @@ traces:
- path: glmark2/bump:bump-render=height.trace
expectations:
- device: gl-panfrost-t860
checksum: 2efca26d7eb85d826276b30f3265153b
checksum: 73cb74446b0aefcfcf7b41d80eaed016
- path: glmark2/bump:bump-render=high-poly.trace
expectations:
- device: gl-panfrost-t860