zink: redo slot mapping again for the last time really I mean it

now that shader compiling is happening all at once, we can store the slot
map on zink_gfx_program directly and reserve it dynamically in order to
use up only the slots that are actually being used across all shader stages

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7100>
This commit is contained in:
Mike Blumenkrantz
2020-06-30 15:10:12 -04:00
committed by Marge Bot
parent 4f144dc92c
commit f85488ab82
6 changed files with 64 additions and 95 deletions

View File

@@ -29,65 +29,7 @@
#include "util/u_memory.h"
#include "util/hash_table.h"
/* this consistently maps slots to a zero-indexed value to avoid wasting slots */
static unsigned slot_pack_map[] = {
/* Position is builtin */
[VARYING_SLOT_POS] = UINT_MAX,
[VARYING_SLOT_COL0] = 0, /* input/output */
[VARYING_SLOT_COL1] = 1, /* input/output */
[VARYING_SLOT_FOGC] = 2, /* input/output */
/* TEX0-7 are deprecated, so we put them at the end of the range and hope nobody uses them all */
[VARYING_SLOT_TEX0] = VARYING_SLOT_VAR0 - 1, /* input/output */
[VARYING_SLOT_TEX1] = VARYING_SLOT_VAR0 - 2,
[VARYING_SLOT_TEX2] = VARYING_SLOT_VAR0 - 3,
[VARYING_SLOT_TEX3] = VARYING_SLOT_VAR0 - 4,
[VARYING_SLOT_TEX4] = VARYING_SLOT_VAR0 - 5,
[VARYING_SLOT_TEX5] = VARYING_SLOT_VAR0 - 6,
[VARYING_SLOT_TEX6] = VARYING_SLOT_VAR0 - 7,
[VARYING_SLOT_TEX7] = VARYING_SLOT_VAR0 - 8,
/* PointSize is builtin */
[VARYING_SLOT_PSIZ] = UINT_MAX,
[VARYING_SLOT_BFC0] = 3, /* output only */
[VARYING_SLOT_BFC1] = 4, /* output only */
[VARYING_SLOT_EDGE] = 5, /* output only */
[VARYING_SLOT_CLIP_VERTEX] = 6, /* output only */
/* ClipDistance is builtin */
[VARYING_SLOT_CLIP_DIST0] = UINT_MAX,
[VARYING_SLOT_CLIP_DIST1] = UINT_MAX,
/* CullDistance is builtin */
[VARYING_SLOT_CULL_DIST0] = UINT_MAX, /* input/output */
[VARYING_SLOT_CULL_DIST1] = UINT_MAX, /* never actually used */
/* PrimitiveId is builtin */
[VARYING_SLOT_PRIMITIVE_ID] = UINT_MAX,
/* Layer is builtin */
[VARYING_SLOT_LAYER] = UINT_MAX, /* input/output */
/* ViewportIndex is builtin */
[VARYING_SLOT_VIEWPORT] = UINT_MAX, /* input/output */
/* FrontFacing is builtin */
[VARYING_SLOT_FACE] = UINT_MAX,
/* PointCoord is builtin */
[VARYING_SLOT_PNTC] = UINT_MAX, /* input only */
/* TessLevelOuter is builtin */
[VARYING_SLOT_TESS_LEVEL_OUTER] = UINT_MAX,
/* TessLevelInner is builtin */
[VARYING_SLOT_TESS_LEVEL_INNER] = UINT_MAX,
[VARYING_SLOT_BOUNDING_BOX0] = 7, /* Only appears as TCS output. */
[VARYING_SLOT_BOUNDING_BOX1] = 8, /* Only appears as TCS output. */
[VARYING_SLOT_VIEW_INDEX] = 9, /* input/output */
[VARYING_SLOT_VIEWPORT_MASK] = 10, /* output only */
};
#define NTV_MIN_RESERVED_SLOTS 11
#define SLOT_UNSET ((unsigned char) -1)
struct ntv_context {
void *mem_ctx;
@@ -123,10 +65,10 @@ struct ntv_context {
bool block_started;
SpvId loop_break, loop_cont;
unsigned char *shader_slot_map;
unsigned char shader_slots_reserved;
SpvId front_face_var, instance_id_var, vertex_id_var;
#ifndef NDEBUG
bool seen_texcoord[8]; //whether we've seen a VARYING_SLOT_TEX[n] this pass
#endif
};
static SpvId
@@ -295,25 +237,24 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type)
unreachable("we shouldn't get here, I think...");
}
static inline unsigned char
reserve_slot(struct ntv_context *ctx)
{
/* TODO: this should actually be clamped to the limits value as in the table
* in 14.1.4 of the vulkan spec, though there's not really any recourse
* other than aborting if we do hit it...
*/
assert(ctx->shader_slots_reserved < MAX_VARYING);
return ctx->shader_slots_reserved++;
}
static inline unsigned
handle_slot(struct ntv_context *ctx, unsigned slot)
{
unsigned orig = slot;
if (slot < VARYING_SLOT_VAR0) {
#ifndef NDEBUG
if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7)
ctx->seen_texcoord[slot - VARYING_SLOT_TEX0] = true;
#endif
slot = slot_pack_map[slot];
if (slot == UINT_MAX)
debug_printf("unhandled varying slot: %s\n", gl_varying_slot_name(orig));
} else {
slot -= VARYING_SLOT_VAR0 - NTV_MIN_RESERVED_SLOTS;
assert(slot <= VARYING_SLOT_VAR0 - 8 ||
!ctx->seen_texcoord[VARYING_SLOT_VAR0 - slot - 1]);
}
assert(slot < VARYING_SLOT_VAR0);
if (ctx->shader_slot_map[slot] == SLOT_UNSET)
ctx->shader_slot_map[slot] = reserve_slot(ctx);
slot = ctx->shader_slot_map[slot];
assert(slot < MAX_VARYING);
return slot;
}
@@ -901,8 +842,7 @@ get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_c
/* for streamout create new outputs, as streamout can be done on individual components,
from complete outputs, so we just can't use the created packed outputs */
static void
emit_so_info(struct ntv_context *ctx, unsigned max_output_location,
const struct zink_so_info *so_info)
emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info)
{
for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) {
struct pipe_stream_output so_output = so_info->so_info.output[i];
@@ -924,16 +864,9 @@ emit_so_info(struct ntv_context *ctx, unsigned max_output_location,
/* output location is incremented by VARYING_SLOT_VAR0 for non-builtins in vtn,
* so we need to ensure that the new xfb location slot doesn't conflict with any previously-emitted
* outputs.
*
* if there's no previous outputs that take up user slots (VAR0+) then we can start right after the
* glsl builtin reserved slots, otherwise we start just after the adjusted user output slot
*/
uint32_t location = NTV_MIN_RESERVED_SLOTS + i;
if (max_output_location >= VARYING_SLOT_VAR0)
location = max_output_location - VARYING_SLOT_VAR0 + 1 + i;
uint32_t location = reserve_slot(ctx);
assert(location < VARYING_SLOT_VAR0);
assert(location <= VARYING_SLOT_VAR0 - 8 ||
!ctx->seen_texcoord[VARYING_SLOT_VAR0 - location - 1]);
spirv_builder_emit_location(&ctx->builder, var_id, location);
/* note: gl_ClipDistance[4] can the 0-indexed member of VARYING_SLOT_CLIP_DIST1 here,
@@ -2236,7 +2169,8 @@ emit_cf_list(struct ntv_context *ctx, struct exec_list *list)
}
struct spirv_shader *
nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info,
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved)
{
struct spirv_shader *ret = NULL;
@@ -2279,6 +2213,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
}
ctx.stage = s->info.stage;
ctx.shader_slot_map = shader_slot_map;
ctx.shader_slots_reserved = *shader_slots_reserved;
ctx.GLSL_std_450 = spirv_builder_import(&ctx.builder, "GLSL.std.450");
spirv_builder_emit_source(&ctx.builder, SpvSourceLanguageGLSL, 450);
@@ -2329,7 +2265,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
if (so_info)
emit_so_info(&ctx, util_last_bit64(s->info.outputs_written), so_info);
emit_so_info(&ctx, so_info);
/* we have to reverse iterate to match what's done in zink_compiler.c */
foreach_list_typed_reverse(nir_variable, var, node, &s->variables)
if (_nir_shader_variable_has_mode(var, nir_var_uniform |
@@ -2421,6 +2357,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
assert(ret->num_words == num_words);
ralloc_free(ctx.mem_ctx);
*shader_slots_reserved = ctx.shader_slots_reserved;
return ret;

View File

@@ -42,7 +42,8 @@ struct nir_shader;
struct pipe_stream_output_info;
struct spirv_shader *
nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info);
nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info,
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved);
void
spirv_shader_delete(struct spirv_shader *s);

View File

@@ -217,11 +217,12 @@ update_so_info(struct zink_shader *sh,
}
VkShaderModule
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs)
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved)
{
VkShaderModule mod = VK_NULL_HANDLE;
void *streamout = zs->streamout.so_info_slots ? &zs->streamout : NULL;
struct spirv_shader *spirv = nir_to_spirv(zs->nir, streamout);
struct spirv_shader *spirv = nir_to_spirv(zs->nir, streamout, shader_slot_map, shader_slots_reserved);
assert(spirv);
if (zink_debug & ZINK_DEBUG_SPIRV) {

View File

@@ -72,7 +72,8 @@ struct zink_shader {
};
VkShaderModule
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs);
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved);
struct zink_shader *
zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,

View File

@@ -146,13 +146,15 @@ update_shader_modules(struct zink_context *ctx, struct zink_shader *stages[ZINK_
unsigned type = u_bit_scan(&dirty_shader_stages);
dirty[tgsi_processor_to_shader_stage(type)] = stages[type];
}
for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
enum pipe_shader_type type = pipe_shader_type_from_mesa(i);
if (dirty[i]) {
prog->modules[type] = CALLOC_STRUCT(zink_shader_module);
assert(prog->modules[type]);
pipe_reference_init(&prog->modules[type]->reference, 1);
prog->modules[type]->shader = zink_shader_compile(zink_screen(ctx->base.screen), dirty[i]);
prog->modules[type]->shader = zink_shader_compile(zink_screen(ctx->base.screen), dirty[i],
prog->shader_slot_map, &prog->shader_slots_reserved);
} else if (stages[type]) /* reuse existing shader module */
zink_shader_module_reference(zink_screen(ctx->base.screen), &prog->modules[type], ctx->curr_program->modules[type]);
prog->shaders[type] = stages[type];
@@ -172,6 +174,28 @@ equals_gfx_pipeline_state(const void *a, const void *b)
return memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash)) == 0;
}
static void
init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog)
{
unsigned existing_shaders = 0;
/* if there's a case where we'll be reusing any shaders, we need to reuse the slot map too */
if (ctx->curr_program) {
for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
if (ctx->curr_program->shaders[i])
existing_shaders |= 1 << i;
}
}
if (ctx->dirty_shader_stages == existing_shaders || !existing_shaders)
/* all shaders are being recompiled: new slot map */
memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map));
else {
/* at least some shaders are being reused: use existing slot map so locations match up */
memcpy(prog->shader_slot_map, ctx->curr_program->shader_slot_map, sizeof(prog->shader_slot_map));
prog->shader_slots_reserved = ctx->curr_program->shader_slots_reserved;
}
}
struct zink_gfx_program *
zink_create_gfx_program(struct zink_context *ctx,
struct zink_shader *stages[ZINK_SHADER_COUNT])
@@ -183,6 +207,8 @@ zink_create_gfx_program(struct zink_context *ctx,
pipe_reference_init(&prog->reference, 1);
init_slot_map(ctx, prog);
update_shader_modules(ctx, stages, prog);
for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) {

View File

@@ -26,6 +26,7 @@
#include <vulkan/vulkan.h>
#include "compiler/shader_enums.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"
@@ -48,6 +49,8 @@ struct zink_gfx_program {
struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here
struct zink_shader *shaders[ZINK_SHADER_COUNT];
unsigned char shader_slot_map[VARYING_SLOT_MAX];
unsigned char shader_slots_reserved;
VkDescriptorSetLayout dsl;
VkPipelineLayout layout;
unsigned num_descriptors;