zink: cap driver inlining using ssa allocation limit
usually inlining is optimal for cpu drivers since the majority of time is spent in the shaders, and any amount of reduction to shader code will be optimal if, however, the shaders are still really big after inlining, this improvement will be negated by the insane amount of time spent doing stupid llvm optimizer passes, so check post-inline size to see whether it exceeds a size threshold lavapipe release build - 1700% improvement * spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec4-index-rd-after-barrier before: 142.15s user 0.42s system 99% cpu 2:23.14 total after: 8.60s user 0.07s system 99% cpu 8.677 total fixes #6647 Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16977>
This commit is contained in:

committed by
Marge Bot

parent
44223e5f28
commit
11e55bce49
@@ -20,8 +20,3 @@ glx@glx-shader-sharing
|
||||
spec@arb_fragment_program@no-newline
|
||||
# glx-destroycontext-1: ../../src/xcb_conn.c:215: write_vec: Assertion `!c->out.queue_len' failed.
|
||||
glx@glx-destroycontext-1
|
||||
|
||||
# #6647
|
||||
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec4-index-rd-after-barrier
|
||||
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec4-index-wr
|
||||
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec4-index-wr-before-barrier
|
||||
|
@@ -2010,6 +2010,10 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
|
||||
/* This must be done again. */
|
||||
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
|
||||
nir_var_shader_out);
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
|
||||
zs->can_inline = false;
|
||||
} else if (need_optimize)
|
||||
optimize_nir(nir);
|
||||
prune_io(nir);
|
||||
@@ -2835,6 +2839,8 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
|
||||
}
|
||||
}
|
||||
|
||||
ret->can_inline = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@@ -38,6 +38,11 @@
|
||||
#define ZINK_WORKGROUP_SIZE_Y 2
|
||||
#define ZINK_WORKGROUP_SIZE_Z 3
|
||||
|
||||
/* stop inlining shaders if they have >limit ssa vals after inlining:
|
||||
* recompile time isn't worth the inline
|
||||
*/
|
||||
#define ZINK_ALWAYS_INLINE_LIMIT 1500
|
||||
|
||||
struct pipe_screen;
|
||||
struct zink_context;
|
||||
struct zink_screen;
|
||||
@@ -90,6 +95,7 @@ struct zink_shader {
|
||||
uint32_t ubos_used; // bitfield of which ubo indices are used
|
||||
uint32_t ssbos_used; // bitfield of which ssbo indices are used
|
||||
bool bindless;
|
||||
bool can_inline;
|
||||
struct spirv_shader *spirv;
|
||||
|
||||
simple_mtx_t lock;
|
||||
|
@@ -106,7 +106,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen
|
||||
}
|
||||
if (ctx && zs->nir->info.num_inlinable_uniforms &&
|
||||
ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(pstage)) {
|
||||
if (screen->is_cpu || prog->inlined_variant_count[pstage] < ZINK_MAX_INLINED_VARIANTS)
|
||||
if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[pstage] < ZINK_MAX_INLINED_VARIANTS))
|
||||
inline_size = zs->nir->info.num_inlinable_uniforms;
|
||||
else
|
||||
key->inline_uniforms = false;
|
||||
|
Reference in New Issue
Block a user