panfrost: Implement a disk cache
Wire up the Mesa shader disk cache into Panfrost. Coupled with the precompiles from the previous patch, this should greatly reduce shader recompile jank. This is a bare bones implementation. Obvious future work includes: - Caching internal (outside of Gallium) shaders - Implement finalize_nir to reduce on disk size of shaders That doesn't need to come in this patch. This patch does shuffle some allocation patterns around to avoid extra nir_shader_clones, but the result should be pretty clean. --- Consider dEQP-GLES31.functional.ssbo.layout.basic_unsized_array.* in the CTS. With a cold cache: 44.11user 0.66system 0:45.44elapsed 98%CPU (0avgtext+0avgdata 267804maxresident) k 0inputs+0outputs (130major+74725minor)pagefaults 0swaps But with this commit and a warm cache: 4.07user 0.35system 0:04.56elapsed 96%CPU (0avgtext+0avgdata 211012maxresident) k0inputs+0outputs (1major+49489minor)pagefaults 0swaps That's an 11x improvement! Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19363>
This commit is contained in:

committed by
Marge Bot

parent
b35a55bb42
commit
40372bd720
@@ -20,6 +20,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
files_panfrost = files(
|
||||
'pan_disk_cache.c',
|
||||
'pan_helpers.c',
|
||||
'pan_public.h',
|
||||
'pan_screen.c',
|
||||
|
@@ -51,6 +51,7 @@
|
||||
#include "pan_util.h"
|
||||
#include "decode.h"
|
||||
#include "util/pan_lower_framebuffer.h"
|
||||
#include "compiler/nir/nir_serialize.h"
|
||||
|
||||
static void
|
||||
panfrost_clear(
|
||||
|
@@ -279,8 +279,18 @@ struct panfrost_fs_key {
|
||||
};
|
||||
|
||||
struct panfrost_shader_key {
|
||||
/* If we need vertex shader keys, union it in */
|
||||
struct panfrost_fs_key fs;
|
||||
union {
|
||||
/* Vertex shaders do not use shader keys. However, we have a
|
||||
* special "transform feedback" vertex program derived from a
|
||||
* vertex shader. If vs_is_xfb is set on a vertex shader, this
|
||||
* is a transform feedback shader, else it is a regular
|
||||
* (unkeyed) vertex shader.
|
||||
*/
|
||||
bool vs_is_xfb;
|
||||
|
||||
/* Fragment shaders use regular shader keys */
|
||||
struct panfrost_fs_key fs;
|
||||
};
|
||||
};
|
||||
|
||||
struct panfrost_compiled_shader {
|
||||
@@ -308,7 +318,14 @@ struct panfrost_compiled_shader {
|
||||
|
||||
/* Shader CSO */
|
||||
struct panfrost_uncompiled_shader {
|
||||
nir_shader *nir;
|
||||
/* NIR for the shader. For graphics, this will be non-NULL even for
|
||||
* TGSI. For compute, this will be NULL after the shader is compiled,
|
||||
* as we don't need any compute variants.
|
||||
*/
|
||||
const nir_shader *nir;
|
||||
|
||||
/* A SHA1 of the serialized NIR for the disk cache. */
|
||||
unsigned char nir_sha1[20];
|
||||
|
||||
/* Stream output information */
|
||||
struct pipe_stream_output_info stream_output;
|
||||
@@ -329,6 +346,35 @@ struct panfrost_uncompiled_shader {
|
||||
uint32_t fixed_varying_mask;
|
||||
};
|
||||
|
||||
/* The binary artefacts of compiling a shader. This differs from
|
||||
* panfrost_compiled_shader, which adds extra metadata beyond compiling but
|
||||
* throws away information not needed after the initial compile.
|
||||
*
|
||||
* This structure is serialized for the shader disk cache.
|
||||
*/
|
||||
struct panfrost_shader_binary {
|
||||
/* Collected information about the compiled shader */
|
||||
struct pan_shader_info info;
|
||||
|
||||
/* The binary itself */
|
||||
struct util_dynarray binary;
|
||||
};
|
||||
|
||||
void
|
||||
panfrost_disk_cache_store(struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *key,
|
||||
const struct panfrost_shader_binary *binary);
|
||||
|
||||
bool
|
||||
panfrost_disk_cache_retrieve(struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *key,
|
||||
struct panfrost_shader_binary *binary);
|
||||
|
||||
void
|
||||
panfrost_disk_cache_init(struct panfrost_screen *screen);
|
||||
|
||||
/** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
|
||||
* Descriptor at draw-time on Midgard
|
||||
*/
|
||||
|
175
src/gallium/drivers/panfrost/pan_disk_cache.c
Normal file
175
src/gallium/drivers/panfrost/pan_disk_cache.c
Normal file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "util/blob.h"
|
||||
#include "util/build_id.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
|
||||
#include "pan_context.h"
|
||||
|
||||
static bool debug = false;
|
||||
|
||||
extern int midgard_debug;
|
||||
extern int bifrost_debug;
|
||||
|
||||
/**
|
||||
* Compute a disk cache key for the given uncompiled shader and shader key.
|
||||
*/
|
||||
static void
|
||||
panfrost_disk_cache_compute_key(struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *shader_key,
|
||||
cache_key cache_key)
|
||||
{
|
||||
uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
|
||||
|
||||
memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
|
||||
memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
|
||||
|
||||
disk_cache_compute_key(cache, data, sizeof(data), cache_key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the given compiled shader in the disk cache.
|
||||
*
|
||||
* This should only be called on newly compiled shaders. No checking is
|
||||
* done to prevent repeated stores of the same shader.
|
||||
*/
|
||||
void
|
||||
panfrost_disk_cache_store(struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *key,
|
||||
const struct panfrost_shader_binary *binary)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
if (!cache)
|
||||
return;
|
||||
|
||||
cache_key cache_key;
|
||||
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
|
||||
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
|
||||
}
|
||||
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
|
||||
/* We write the following data to the cache blob:
|
||||
*
|
||||
* 1. Size of program binary
|
||||
* 2. Program binary
|
||||
* 3. Shader info
|
||||
*/
|
||||
blob_write_uint32(&blob, binary->binary.size);
|
||||
blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
|
||||
blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
|
||||
|
||||
disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
|
||||
blob_finish(&blob);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a compiled shader in the disk cache.
|
||||
*/
|
||||
bool
|
||||
panfrost_disk_cache_retrieve(struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *key,
|
||||
struct panfrost_shader_binary *binary)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
if (!cache)
|
||||
return false;
|
||||
|
||||
cache_key cache_key;
|
||||
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
|
||||
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
|
||||
}
|
||||
|
||||
size_t size;
|
||||
void *buffer = disk_cache_get(cache, cache_key, &size);
|
||||
|
||||
if (debug)
|
||||
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
|
||||
|
||||
if (!buffer)
|
||||
return false;
|
||||
|
||||
struct blob_reader blob;
|
||||
blob_reader_init(&blob, buffer, size);
|
||||
|
||||
util_dynarray_init(&binary->binary, NULL);
|
||||
|
||||
uint32_t binary_size = blob_read_uint32(&blob);
|
||||
void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
|
||||
|
||||
blob_copy_bytes(&blob, ptr, binary_size);
|
||||
blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the on-disk shader cache.
|
||||
*/
|
||||
void
|
||||
panfrost_disk_cache_init(struct panfrost_screen *screen)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
const char *renderer = screen->base.get_name(&screen->base);
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
|
||||
assert(note && build_id_length(note) == 20); /* sha1 */
|
||||
|
||||
const uint8_t *id_sha1 = build_id_data(note);
|
||||
assert(id_sha1);
|
||||
|
||||
char timestamp[41];
|
||||
_mesa_sha1_format(timestamp, id_sha1);
|
||||
|
||||
/* Consider any flags affecting the compile when caching */
|
||||
uint64_t driver_flags = screen->dev.debug;
|
||||
driver_flags |= ((uint64_t) (midgard_debug | bifrost_debug) << 32);
|
||||
|
||||
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
|
||||
#endif
|
||||
}
|
@@ -753,6 +753,8 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
|
||||
if (dev->ro)
|
||||
dev->ro->destroy(dev->ro);
|
||||
panfrost_close_device(dev);
|
||||
|
||||
disk_cache_destroy(screen->disk_cache);
|
||||
ralloc_free(pscreen);
|
||||
}
|
||||
|
||||
@@ -853,6 +855,12 @@ panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
|
||||
return pan_screen(pscreen)->vtbl.get_compiler_options();
|
||||
}
|
||||
|
||||
static struct disk_cache *
|
||||
panfrost_get_disk_shader_cache(struct pipe_screen *pscreen)
|
||||
{
|
||||
return pan_screen(pscreen)->disk_cache;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
panfrost_create_screen(int fd, struct renderonly *ro)
|
||||
{
|
||||
@@ -896,12 +904,16 @@ panfrost_create_screen(int fd, struct renderonly *ro)
|
||||
panfrost_is_dmabuf_modifier_supported;
|
||||
screen->base.context_create = panfrost_create_context;
|
||||
screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
|
||||
screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
|
||||
screen->base.fence_reference = panfrost_fence_reference;
|
||||
screen->base.fence_finish = panfrost_fence_finish;
|
||||
screen->base.set_damage_region = panfrost_resource_set_damage_region;
|
||||
|
||||
panfrost_resource_screen_init(&screen->base);
|
||||
pan_blend_shaders_init(dev);
|
||||
|
||||
panfrost_disk_cache_init(screen);
|
||||
|
||||
panfrost_pool_init(&screen->indirect_draw.bin_pool, NULL, dev,
|
||||
PAN_BO_EXECUTE, 65536, "Indirect draw shaders",
|
||||
false, true);
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include "util/bitset.h"
|
||||
#include "util/set.h"
|
||||
#include "util/log.h"
|
||||
#include "util/disk_cache.h"
|
||||
|
||||
#include "pan_device.h"
|
||||
#include "pan_mempool.h"
|
||||
@@ -107,6 +108,7 @@ struct panfrost_screen {
|
||||
} indirect_draw;
|
||||
|
||||
struct panfrost_vtable vtbl;
|
||||
struct disk_cache *disk_cache;
|
||||
};
|
||||
|
||||
static inline struct panfrost_screen *
|
||||
|
@@ -37,12 +37,26 @@
|
||||
#include "nir_serialize.h"
|
||||
|
||||
static struct panfrost_uncompiled_shader *
|
||||
panfrost_alloc_shader(void)
|
||||
panfrost_alloc_shader(const nir_shader *nir)
|
||||
{
|
||||
struct panfrost_uncompiled_shader *so = CALLOC_STRUCT(panfrost_uncompiled_shader);
|
||||
struct panfrost_uncompiled_shader *so =
|
||||
rzalloc(NULL, struct panfrost_uncompiled_shader);
|
||||
|
||||
simple_mtx_init(&so->lock, mtx_plain);
|
||||
util_dynarray_init(&so->variants, NULL);
|
||||
util_dynarray_init(&so->variants, so);
|
||||
|
||||
so->nir = nir;
|
||||
|
||||
/* Serialize the NIR to a binary blob that we can hash for the disk
|
||||
* cache. Drop unnecessary information (like variable names) so the
|
||||
* serialized NIR is smaller, and also to let us detect more isomorphic
|
||||
* shaders when hashing, increasing cache hits.
|
||||
*/
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
nir_serialize(&blob, nir, true);
|
||||
_mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
|
||||
blob_finish(&blob);
|
||||
|
||||
return so;
|
||||
}
|
||||
@@ -54,17 +68,15 @@ panfrost_alloc_variant(struct panfrost_uncompiled_shader *so)
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_shader_compile(struct pipe_screen *pscreen,
|
||||
struct panfrost_pool *shader_pool,
|
||||
struct panfrost_pool *desc_pool,
|
||||
panfrost_shader_compile(struct panfrost_screen *screen,
|
||||
const nir_shader *ir,
|
||||
struct util_debug_callback *dbg,
|
||||
struct panfrost_compiled_shader *state,
|
||||
struct panfrost_shader_key *key,
|
||||
unsigned req_local_mem,
|
||||
unsigned fixed_varying_mask)
|
||||
unsigned fixed_varying_mask,
|
||||
struct panfrost_shader_binary *out)
|
||||
{
|
||||
struct panfrost_screen *screen = pan_screen(pscreen);
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
struct panfrost_device *dev = pan_device(&screen->base);
|
||||
|
||||
nir_shader *s = nir_shader_clone(NULL, ir);
|
||||
|
||||
@@ -76,27 +88,27 @@ panfrost_shader_compile(struct pipe_screen *pscreen,
|
||||
|
||||
/* Lower this early so the backends don't have to worry about it */
|
||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
inputs.fixed_varying_mask = state->key.fs.fixed_varying_mask;
|
||||
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
|
||||
|
||||
if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
NIR_PASS_V(s, nir_lower_fragcolor,
|
||||
state->key.fs.nr_cbufs_for_fragcolor);
|
||||
key->fs.nr_cbufs_for_fragcolor);
|
||||
}
|
||||
|
||||
if (state->key.fs.sprite_coord_enable) {
|
||||
if (key->fs.sprite_coord_enable) {
|
||||
NIR_PASS_V(s, nir_lower_texcoord_replace,
|
||||
state->key.fs.sprite_coord_enable,
|
||||
key->fs.sprite_coord_enable,
|
||||
true /* point coord is sysval */,
|
||||
false /* Y-invert */);
|
||||
}
|
||||
|
||||
if (state->key.fs.clip_plane_enable) {
|
||||
if (key->fs.clip_plane_enable) {
|
||||
NIR_PASS_V(s, nir_lower_clip_fs,
|
||||
state->key.fs.clip_plane_enable,
|
||||
key->fs.clip_plane_enable,
|
||||
false);
|
||||
}
|
||||
|
||||
memcpy(inputs.rt_formats, state->key.fs.rt_formats, sizeof(inputs.rt_formats));
|
||||
memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
|
||||
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||
inputs.fixed_varying_mask = fixed_varying_mask;
|
||||
|
||||
@@ -104,41 +116,67 @@ panfrost_shader_compile(struct pipe_screen *pscreen,
|
||||
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
||||
}
|
||||
|
||||
struct util_dynarray binary;
|
||||
util_dynarray_init(&out->binary, NULL);
|
||||
screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
|
||||
|
||||
util_dynarray_init(&binary, NULL);
|
||||
screen->vtbl.compile_shader(s, &inputs, &binary, &state->info);
|
||||
assert(req_local_mem >= out->info.wls_size);
|
||||
out->info.wls_size = req_local_mem;
|
||||
|
||||
assert(req_local_mem >= state->info.wls_size);
|
||||
state->info.wls_size = req_local_mem;
|
||||
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
|
||||
* a NULL context
|
||||
*/
|
||||
ralloc_free(s);
|
||||
}
|
||||
|
||||
if (binary.size) {
|
||||
state->bin = panfrost_pool_take_ref(shader_pool,
|
||||
pan_pool_upload_aligned(&shader_pool->base,
|
||||
binary.data, binary.size, 128));
|
||||
static void
|
||||
panfrost_shader_get(struct pipe_screen *pscreen,
|
||||
struct panfrost_pool *shader_pool,
|
||||
struct panfrost_pool *desc_pool,
|
||||
struct panfrost_uncompiled_shader *uncompiled,
|
||||
struct util_debug_callback *dbg,
|
||||
struct panfrost_compiled_shader *state,
|
||||
unsigned req_local_mem)
|
||||
{
|
||||
struct panfrost_screen *screen = pan_screen(pscreen);
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
|
||||
struct panfrost_shader_binary res = { 0 };
|
||||
|
||||
/* Try to retrieve the variant from the disk cache. If that fails,
|
||||
* compile a new variant and store in the disk cache for later reuse.
|
||||
*/
|
||||
if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled, &state->key, &res)) {
|
||||
panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
|
||||
req_local_mem,
|
||||
uncompiled->fixed_varying_mask, &res);
|
||||
|
||||
panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key, &res);
|
||||
}
|
||||
|
||||
state->info = res.info;
|
||||
|
||||
if (res.binary.size) {
|
||||
state->bin = panfrost_pool_take_ref(shader_pool,
|
||||
pan_pool_upload_aligned(&shader_pool->base,
|
||||
res.binary.data, res.binary.size, 128));
|
||||
}
|
||||
|
||||
util_dynarray_fini(&res.binary);
|
||||
|
||||
/* Don't upload RSD for fragment shaders since they need draw-time
|
||||
* merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
|
||||
* shader program descriptors on Valhall, which can be preuploaded even
|
||||
* for fragment shaders. */
|
||||
bool upload = !(s->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
|
||||
bool upload = !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
|
||||
screen->vtbl.prepare_shader(state, desc_pool, upload);
|
||||
|
||||
panfrost_analyze_sysvals(state);
|
||||
|
||||
util_dynarray_fini(&binary);
|
||||
|
||||
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
|
||||
* a NULL context */
|
||||
ralloc_free(s);
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_build_key(struct panfrost_context *ctx,
|
||||
struct panfrost_shader_key *key,
|
||||
nir_shader *nir)
|
||||
const nir_shader *nir)
|
||||
{
|
||||
/* We don't currently have vertex shader variants */
|
||||
if (nir->info.stage != MESA_SHADER_FRAGMENT)
|
||||
@@ -237,10 +275,8 @@ panfrost_new_variant_locked(
|
||||
.stream_output = uncompiled->stream_output,
|
||||
};
|
||||
|
||||
panfrost_shader_compile(ctx->base.screen,
|
||||
&ctx->shaders, &ctx->descs, uncompiled->nir,
|
||||
&ctx->base.debug, prog, 0,
|
||||
uncompiled->fixed_varying_mask);
|
||||
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
|
||||
uncompiled, &ctx->base.debug, prog, 0);
|
||||
|
||||
/* Fixup the stream out information */
|
||||
prog->so_mask =
|
||||
@@ -333,14 +369,19 @@ panfrost_create_shader_state(
|
||||
struct pipe_context *pctx,
|
||||
const struct pipe_shader_state *cso)
|
||||
{
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader();
|
||||
nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI) ?
|
||||
tgsi_to_nir(cso->tokens, pctx->screen, false) :
|
||||
cso->ir.nir;
|
||||
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
|
||||
|
||||
/* The driver gets ownership of the nir_shader for graphics. The NIR is
|
||||
* ralloc'd. Free the NIR when we free the uncompiled shader.
|
||||
*/
|
||||
ralloc_steal(so, nir);
|
||||
|
||||
so->stream_output = cso->stream_output;
|
||||
|
||||
if (cso->type == PIPE_SHADER_IR_TGSI)
|
||||
so->nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
|
||||
else
|
||||
so->nir = cso->ir.nir;
|
||||
so->nir = nir;
|
||||
|
||||
/* Fix linkage early */
|
||||
if (so->nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
@@ -353,7 +394,6 @@ panfrost_create_shader_state(
|
||||
* feedback program. This is a special shader variant.
|
||||
*/
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
struct util_debug_callback *dbg = &ctx->base.debug;
|
||||
|
||||
if (so->nir->xfb_info) {
|
||||
nir_shader *xfb = nir_shader_clone(NULL, so->nir);
|
||||
@@ -361,14 +401,15 @@ panfrost_create_shader_state(
|
||||
xfb->info.internal = true;
|
||||
|
||||
so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
|
||||
panfrost_shader_compile(pctx->screen, &ctx->shaders,
|
||||
&ctx->descs, xfb, dbg, so->xfb, 0,
|
||||
so->fixed_varying_mask);
|
||||
so->xfb->key.vs_is_xfb = true;
|
||||
|
||||
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
|
||||
so, &ctx->base.debug, so->xfb, 0);
|
||||
|
||||
/* Since transform feedback is handled via the transform
|
||||
* feedback program, the original program no longer uses XFB
|
||||
*/
|
||||
so->nir->info.has_transform_feedback_varyings = false;
|
||||
nir->info.has_transform_feedback_varyings = false;
|
||||
}
|
||||
|
||||
/* Compile the program. We don't use vertex shader keys, so there will
|
||||
@@ -401,14 +442,10 @@ panfrost_create_shader_state(
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_delete_shader_state(
|
||||
struct pipe_context *pctx,
|
||||
void *so)
|
||||
panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
|
||||
{
|
||||
struct panfrost_uncompiled_shader *cso = (struct panfrost_uncompiled_shader *) so;
|
||||
|
||||
ralloc_free(cso->nir);
|
||||
|
||||
util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
|
||||
panfrost_bo_unreference(so->bin.bo);
|
||||
panfrost_bo_unreference(so->state.bo);
|
||||
@@ -424,8 +461,7 @@ panfrost_delete_shader_state(
|
||||
|
||||
simple_mtx_destroy(&cso->lock);
|
||||
|
||||
util_dynarray_fini(&cso->variants);
|
||||
free(so);
|
||||
ralloc_free(so);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -438,15 +474,19 @@ panfrost_create_compute_state(
|
||||
const struct pipe_compute_state *cso)
|
||||
{
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader();
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
|
||||
struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
|
||||
memset(v, 0, sizeof *v);
|
||||
|
||||
assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
|
||||
|
||||
panfrost_shader_compile(pctx->screen, &ctx->shaders, &ctx->descs,
|
||||
cso->prog, &ctx->base.debug, v,
|
||||
cso->req_local_mem, 0);
|
||||
panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs,
|
||||
so, &ctx->base.debug, v, cso->req_local_mem);
|
||||
|
||||
/* The NIR becomes invalid after this. For compute kernels, we never
|
||||
* need to access it again. Don't keep a dangling pointer around.
|
||||
*/
|
||||
so->nir = NULL;
|
||||
|
||||
return so;
|
||||
}
|
||||
@@ -463,16 +503,6 @@ panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
|
||||
uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_delete_compute_state(struct pipe_context *pipe, void *cso)
|
||||
{
|
||||
struct panfrost_uncompiled_shader *so =
|
||||
(struct panfrost_uncompiled_shader *)cso;
|
||||
|
||||
util_dynarray_fini(&so->variants);
|
||||
free(cso);
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_shader_context_init(struct pipe_context *pctx)
|
||||
{
|
||||
@@ -486,5 +516,5 @@ panfrost_shader_context_init(struct pipe_context *pctx)
|
||||
|
||||
pctx->create_compute_state = panfrost_create_compute_state;
|
||||
pctx->bind_compute_state = panfrost_bind_compute_state;
|
||||
pctx->delete_compute_state = panfrost_delete_compute_state;
|
||||
pctx->delete_compute_state = panfrost_delete_shader_state;
|
||||
}
|
||||
|
Reference in New Issue
Block a user