iris: copy over i965's cache tracking

needed to split out vtbl so I can pipe control without ice
This commit is contained in:
Kenneth Graunke
2018-04-20 23:28:03 -07:00
parent dbd4770397
commit 60d708bb80
9 changed files with 233 additions and 65 deletions

View File

@@ -29,6 +29,7 @@
#include "drm-uapi/i915_drm.h"
#include "util/hash_table.h"
#include "util/set.h"
#include "main/macros.h"
#include <errno.h>
@@ -123,10 +124,12 @@ create_batch_buffer(struct iris_bufmgr *bufmgr,
void
iris_init_batch(struct iris_batch *batch,
struct iris_screen *screen,
struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg,
uint8_t ring)
{
batch->screen = screen;
batch->vtbl = vtbl;
batch->dbg = dbg;
/* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
@@ -141,6 +144,10 @@ iris_init_batch(struct iris_batch *batch,
batch->validation_list =
malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
if (unlikely(INTEL_DEBUG)) {
batch->state_sizes =
_mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
@@ -223,10 +230,10 @@ iris_batch_reset(struct iris_batch *batch)
}
static void
iris_batch_reset_and_clear_render_cache(struct iris_batch *batch)
iris_batch_reset_and_clear_caches(struct iris_batch *batch)
{
iris_batch_reset(batch);
// XXX: iris_render_cache_set_clear(batch);
iris_cache_sets_clear(batch);
}
static void
@@ -250,6 +257,9 @@ iris_batch_free(struct iris_batch *batch)
iris_bo_unreference(batch->last_cmd_bo);
_mesa_hash_table_destroy(batch->cache.render, NULL);
_mesa_set_destroy(batch->cache.depth, NULL);
if (batch->state_sizes) {
_mesa_hash_table_destroy(batch->state_sizes, NULL);
gen_batch_decode_ctx_finish(&batch->decoder);
@@ -581,7 +591,7 @@ _iris_batch_flush_fence(struct iris_batch *batch,
batch->aperture_space = 0;
/* Start a new batch buffer. */
iris_batch_reset_and_clear_render_cache(batch);
iris_batch_reset_and_clear_caches(batch);
return 0;
}

View File

@@ -48,6 +48,7 @@ struct iris_batch_buffer {
struct iris_batch {
struct iris_screen *screen;
struct iris_vtable *vtbl;
struct pipe_debug_callback *dbg;
/** Current batchbuffer being queued up. */
@@ -72,6 +73,22 @@ struct iris_batch {
/** The amount of aperture space (in bytes) used by all exec_bos */
int aperture_space;
struct {
/**
* Set of struct brw_bo * that have been rendered to within this
* batchbuffer and would need flushing before being used from another
* cache domain that isn't coherent with it (i.e. the sampler).
*/
struct hash_table *render;
/**
* Set of struct brw_bo * that have been used as a depth buffer within
* this batchbuffer and would need flushing before being used from
* another cache domain that isn't coherent with it (i.e. the sampler).
*/
struct set *depth;
} cache;
#if DEBUG
/** Map from batch offset to iris_alloc_state data (with DEBUG_BATCH) */
// XXX: unused
@@ -82,6 +99,7 @@ struct iris_batch {
void iris_init_batch(struct iris_batch *batch,
struct iris_screen *screen,
struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg,
uint8_t ring);
void iris_batch_free(struct iris_batch *batch);

View File

@@ -147,7 +147,8 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE);
genX_call(devinfo, init_state, ice);
ice->state.init_render_context(screen, &ice->render_batch, &ice->dbg);
ice->vtbl.init_render_context(screen, &ice->render_batch, &ice->vtbl,
&ice->dbg);
return ctx;
}

View File

@@ -33,6 +33,7 @@
#include "iris_screen.h"
struct iris_bo;
struct iris_context;
#define IRIS_RESOURCE_FLAG_SHADER_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define IRIS_RESOURCE_FLAG_SURFACE_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
@@ -195,11 +196,41 @@ struct iris_shader_state {
unsigned const_size;
};
struct iris_vtable {
void (*destroy_state)(struct iris_context *ice);
void (*init_render_context)(struct iris_screen *screen,
struct iris_batch *batch,
struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg);
void (*upload_render_state)(struct iris_context *ice,
struct iris_batch *batch,
const struct pipe_draw_info *draw);
void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags,
struct iris_bo *bo, uint32_t offset,
uint64_t imm);
unsigned (*derived_program_state_size)(enum iris_program_cache_id id);
void (*set_derived_program_state)(const struct gen_device_info *devinfo,
enum iris_program_cache_id cache_id,
struct iris_compiled_shader *shader);
void (*populate_vs_key)(const struct iris_context *ice,
struct brw_vs_prog_key *key);
void (*populate_tcs_key)(const struct iris_context *ice,
struct brw_tcs_prog_key *key);
void (*populate_tes_key)(const struct iris_context *ice,
struct brw_tes_prog_key *key);
void (*populate_gs_key)(const struct iris_context *ice,
struct brw_gs_prog_key *key);
void (*populate_fs_key)(const struct iris_context *ice,
struct brw_wm_prog_key *key);
};
struct iris_context {
struct pipe_context ctx;
struct pipe_debug_callback dbg;
struct iris_vtable vtbl;
struct {
struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
struct iris_compiled_shader *prog[MESA_SHADER_STAGES];
@@ -247,30 +278,6 @@ struct iris_context {
// "I'm streaming this out at draw time and never want it again!"
struct u_upload_mgr *dynamic_uploader;
void (*destroy_state)(struct iris_context *ice);
void (*init_render_context)(struct iris_screen *screen,
struct iris_batch *batch,
struct pipe_debug_callback *dbg);
void (*upload_render_state)(struct iris_context *ice,
struct iris_batch *batch,
const struct pipe_draw_info *draw);
void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags,
struct iris_bo *bo, uint32_t offset,
uint64_t imm);
unsigned (*derived_program_state_size)(enum iris_program_cache_id id);
void (*set_derived_program_state)(const struct gen_device_info *devinfo,
enum iris_program_cache_id cache_id,
struct iris_compiled_shader *shader);
void (*populate_vs_key)(const struct iris_context *ice,
struct brw_vs_prog_key *key);
void (*populate_tcs_key)(const struct iris_context *ice,
struct brw_tcs_prog_key *key);
void (*populate_tes_key)(const struct iris_context *ice,
struct brw_tes_prog_key *key);
void (*populate_gs_key)(const struct iris_context *ice,
struct brw_gs_prog_key *key);
void (*populate_fs_key)(const struct iris_context *ice,
struct brw_wm_prog_key *key);
} state;
};
@@ -299,17 +306,27 @@ void iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
/* iris_pipe_control.c */
void iris_emit_pipe_control_flush(struct iris_context *ice,
struct iris_batch *batch,
void iris_emit_pipe_control_flush(struct iris_batch *batch,
uint32_t flags);
void iris_emit_pipe_control_write(struct iris_context *ice,
struct iris_batch *batch, uint32_t flags,
void iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags,
struct iris_bo *bo, uint32_t offset,
uint64_t imm);
void iris_emit_end_of_pipe_sync(struct iris_context *ice,
struct iris_batch *batch,
void iris_emit_end_of_pipe_sync(struct iris_batch *batch,
uint32_t flags);
void iris_cache_sets_clear(struct iris_batch *batch);
void iris_cache_flush_for_read(struct iris_batch *batch, struct iris_bo *bo);
void iris_cache_flush_for_render(struct iris_batch *batch,
struct iris_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage);
void iris_render_cache_add_bo(struct iris_batch *batch,
struct iris_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage);
void iris_cache_flush_for_depth(struct iris_batch *batch, struct iris_bo *bo);
void iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo);
/* iris_state.c */
void gen9_init_state(struct iris_context *ice);

View File

@@ -37,5 +37,5 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct iris_context *ice = (struct iris_context *) ctx;
iris_update_compiled_shaders(ice);
ice->state.upload_render_state(ice, &ice->render_batch, info);
ice->vtbl.upload_render_state(ice, &ice->render_batch, info);
}

View File

@@ -22,6 +22,8 @@
*/
#include "iris_context.h"
#include "util/hash_table.h"
#include "util/set.h"
/**
* Emit a PIPE_CONTROL with various flushing flags.
@@ -30,9 +32,7 @@
* given generation.
*/
void
iris_emit_pipe_control_flush(struct iris_context *ice,
struct iris_batch *batch,
uint32_t flags)
iris_emit_pipe_control_flush(struct iris_batch *batch, uint32_t flags)
{
if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
@@ -47,12 +47,11 @@ iris_emit_pipe_control_flush(struct iris_context *ice,
* with any write cache flush, so this shouldn't be a concern. In order
* to ensure a full stall, we do an end-of-pipe sync.
*/
iris_emit_end_of_pipe_sync(ice, batch,
flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
iris_emit_end_of_pipe_sync(batch, flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
}
ice->state.emit_raw_pipe_control(batch, flags, NULL, 0, 0);
batch->vtbl->emit_raw_pipe_control(batch, flags, NULL, 0, 0);
}
/**
@@ -64,12 +63,11 @@ iris_emit_pipe_control_flush(struct iris_context *ice,
* - PIPE_CONTROL_WRITE_DEPTH_COUNT
*/
void
iris_emit_pipe_control_write(struct iris_context *ice,
struct iris_batch *batch, uint32_t flags,
iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags,
struct iris_bo *bo, uint32_t offset,
uint64_t imm)
{
ice->state.emit_raw_pipe_control(batch, flags, bo, offset, imm);
batch->vtbl->emit_raw_pipe_control(batch, flags, bo, offset, imm);
}
/*
@@ -95,9 +93,7 @@ iris_emit_pipe_control_write(struct iris_context *ice,
* Data" in the PIPE_CONTROL command.
*/
void
iris_emit_end_of_pipe_sync(struct iris_context *ice,
struct iris_batch *batch,
uint32_t flags)
iris_emit_end_of_pipe_sync(struct iris_batch *batch, uint32_t flags)
{
/* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
*
@@ -121,7 +117,132 @@ iris_emit_end_of_pipe_sync(struct iris_context *ice,
* Data, Required Write Cache Flush bits set)
* - Workload-2 (Can use the data produce or output by Workload-1)
*/
iris_emit_pipe_control_write(ice, batch, flags | PIPE_CONTROL_CS_STALL |
iris_emit_pipe_control_write(batch, flags | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_WRITE_IMMEDIATE,
batch->screen->workaround_bo, 0, 0);
}
void
iris_cache_sets_clear(struct iris_batch *batch)
{
struct hash_entry *render_entry;
hash_table_foreach(batch->cache.render, render_entry)
_mesa_hash_table_remove(batch->cache.render, render_entry);
struct set_entry *depth_entry;
set_foreach(batch->cache.depth, depth_entry)
_mesa_set_remove(batch->cache.depth, depth_entry);
}
/**
* Emits an appropriate flush for a BO if it has been rendered to within the
* same batchbuffer as a read that's about to be emitted.
*
* The GPU has separate, incoherent caches for the render cache and the
* sampler cache, along with other caches. Usually data in the different
* caches don't interact (e.g. we don't render to our driver-generated
* immediate constant data), but for render-to-texture in FBOs we definitely
* do. When a batchbuffer is flushed, the kernel will ensure that everything
* necessary is flushed before another use of that BO, but for reuse from
* different caches within a batchbuffer, it's all our responsibility.
*/
static void
flush_depth_and_render_caches(struct iris_batch *batch, struct iris_bo *bo)
{
iris_emit_pipe_control_flush(batch,
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_CS_STALL);
iris_emit_pipe_control_flush(batch,
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE);
iris_cache_sets_clear(batch);
}
void
iris_cache_flush_for_read(struct iris_batch *batch,
struct iris_bo *bo)
{
if (_mesa_hash_table_search(batch->cache.render, bo) ||
_mesa_set_search(batch->cache.depth, bo))
flush_depth_and_render_caches(batch, bo);
}
static void *
format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
{
return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
}
void
iris_cache_flush_for_render(struct iris_batch *batch,
struct iris_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage)
{
if (_mesa_set_search(batch->cache.depth, bo))
flush_depth_and_render_caches(batch, bo);
/* Check to see if this bo has been used by a previous rendering operation
* but with a different format or aux usage. If it has, flush the render
* cache so we ensure that it's only in there with one format or aux usage
* at a time.
*
* Even though it's not obvious, this can easily happen in practice.
* Suppose a client is blending on a surface with sRGB encode enabled on
* gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client
* then disables sRGB decode and continues blending we will flip on
* AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
* perfectly valid since CCS_E is a subset of CCS_D). However, this means
* that we have fragments in-flight which are rendering with UNORM+CCS_E
* and other fragments in-flight with SRGB+CCS_D on the same surface at the
* same time and the pixel scoreboard and color blender are trying to sort
* it all out. This ends badly (i.e. GPU hangs).
*
* To date, we have never observed GPU hangs or even corruption to be
* associated with switching the format, only the aux usage. However,
* there are comments in various docs which indicate that the render cache
* isn't 100% resilient to format changes. We may as well be conservative
* and flush on format changes too. We can always relax this later if we
* find it to be a performance problem.
*/
struct hash_entry *entry = _mesa_hash_table_search(batch->cache.render, bo);
if (entry && entry->data != format_aux_tuple(format, aux_usage))
flush_depth_and_render_caches(batch, bo);
}
void
iris_render_cache_add_bo(struct iris_batch *batch,
struct iris_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage)
{
#ifndef NDEBUG
struct hash_entry *entry = _mesa_hash_table_search(batch->cache.render, bo);
if (entry) {
/* Otherwise, someone didn't do a flush_for_render and that would be
* very bad indeed.
*/
assert(entry->data == format_aux_tuple(format, aux_usage));
}
#endif
_mesa_hash_table_insert(batch->cache.render, bo,
format_aux_tuple(format, aux_usage));
}
void
iris_cache_flush_for_depth(struct iris_batch *batch,
struct iris_bo *bo)
{
if (_mesa_hash_table_search(batch->cache.render, bo))
flush_depth_and_render_caches(batch, bo);
}
void
iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo)
{
_mesa_set_add(batch->cache.depth, bo);
}

View File

@@ -278,7 +278,7 @@ static void
iris_update_compiled_vs(struct iris_context *ice)
{
struct brw_vs_prog_key key;
ice->state.populate_vs_key(ice, &key);
ice->vtbl.populate_vs_key(ice, &key);
if (iris_bind_cached_shader(ice, IRIS_CACHE_VS, &key))
return;
@@ -344,7 +344,7 @@ iris_update_compiled_tes(struct iris_context *ice)
return;
struct brw_tes_prog_key key;
ice->state.populate_tes_key(ice, &key);
ice->vtbl.populate_tes_key(ice, &key);
if (iris_bind_cached_shader(ice, IRIS_CACHE_TES, &key))
return;
@@ -404,7 +404,7 @@ static void
iris_update_compiled_fs(struct iris_context *ice)
{
struct brw_wm_prog_key key;
ice->state.populate_fs_key(ice, &key);
ice->vtbl.populate_fs_key(ice, &key);
if (iris_bind_cached_shader(ice, IRIS_CACHE_FS, &key))
return;

View File

@@ -193,7 +193,7 @@ iris_upload_and_bind_shader(struct iris_context *ice,
struct hash_table *cache = ice->shaders.cache;
struct iris_compiled_shader *shader =
ralloc_size(cache, sizeof(struct iris_compiled_shader) +
ice->state.derived_program_state_size(cache_id));
ice->vtbl.derived_program_state_size(cache_id));
const struct iris_compiled_shader *existing =
find_existing_assembly(cache, assembly, prog_data->program_size);
@@ -219,7 +219,7 @@ iris_upload_and_bind_shader(struct iris_context *ice,
ralloc_steal(shader->prog_data, prog_data->pull_param);
/* Store the 3DSTATE shader packets and other derived state. */
ice->state.set_derived_program_state(devinfo, cache_id, shader);
ice->vtbl.set_derived_program_state(devinfo, cache_id, shader);
struct keybox *keybox = make_keybox(cache, cache_id, key);
_mesa_hash_table_insert(ice->shaders.cache, keybox, shader);

View File

@@ -332,9 +332,10 @@ emit_state(struct iris_batch *batch,
static void
iris_init_render_context(struct iris_screen *screen,
struct iris_batch *batch,
struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg)
{
iris_init_batch(batch, screen, dbg, I915_EXEC_RENDER);
iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER);
/* XXX: PIPE_CONTROLs */
@@ -2912,17 +2913,17 @@ genX(init_state)(struct iris_context *ice)
ctx->stream_output_target_destroy = iris_stream_output_target_destroy;
ctx->set_stream_output_targets = iris_set_stream_output_targets;
ice->state.destroy_state = iris_destroy_state;
ice->state.init_render_context = iris_init_render_context;
ice->state.upload_render_state = iris_upload_render_state;
ice->state.emit_raw_pipe_control = iris_emit_raw_pipe_control;
ice->state.derived_program_state_size = iris_derived_program_state_size;
ice->state.set_derived_program_state = iris_set_derived_program_state;
ice->state.populate_vs_key = iris_populate_vs_key;
ice->state.populate_tcs_key = iris_populate_tcs_key;
ice->state.populate_tes_key = iris_populate_tes_key;
ice->state.populate_gs_key = iris_populate_gs_key;
ice->state.populate_fs_key = iris_populate_fs_key;
ice->vtbl.destroy_state = iris_destroy_state;
ice->vtbl.init_render_context = iris_init_render_context;
ice->vtbl.upload_render_state = iris_upload_render_state;
ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
ice->vtbl.derived_program_state_size = iris_derived_program_state_size;
ice->vtbl.set_derived_program_state = iris_set_derived_program_state;
ice->vtbl.populate_vs_key = iris_populate_vs_key;
ice->vtbl.populate_tcs_key = iris_populate_tcs_key;
ice->vtbl.populate_tes_key = iris_populate_tes_key;
ice->vtbl.populate_gs_key = iris_populate_gs_key;
ice->vtbl.populate_fs_key = iris_populate_fs_key;
ice->state.dirty = ~0ull;
}