iris: Implement 3DSTATE_SO_DECL_LIST

This commit is contained in:
Kenneth Graunke
2018-06-29 12:58:31 -07:00
parent 6794f1ffb9
commit 5c00f5fdca
4 changed files with 159 additions and 23 deletions

View File

@@ -93,6 +93,9 @@ struct blorp_params;
#define IRIS_DIRTY_BINDINGS_GS (1ull << 45) #define IRIS_DIRTY_BINDINGS_GS (1ull << 45)
#define IRIS_DIRTY_BINDINGS_FS (1ull << 46) #define IRIS_DIRTY_BINDINGS_FS (1ull << 46)
#define IRIS_DIRTY_BINDINGS_CS (1ull << 47) #define IRIS_DIRTY_BINDINGS_CS (1ull << 47)
#define IRIS_DIRTY_SO_BUFFERS (1ull << 48)
#define IRIS_DIRTY_SO_DECL_LIST (1ull << 49)
#define IRIS_DIRTY_STREAMOUT (1ull << 50)
struct iris_depth_stencil_alpha_state; struct iris_depth_stencil_alpha_state;
@@ -167,6 +170,11 @@ struct iris_compiled_shader {
/** The program data (owned by the program cache hash table) */ /** The program data (owned by the program cache hash table) */
struct brw_stage_prog_data *prog_data; struct brw_stage_prog_data *prog_data;
/**
* Derived 3DSTATE_SO_DECL_LIST packet (for transform feedback).
*/
uint32_t *so_decl_list;
/** /**
* Shader packets and other data derived from prog_data. These must be * Shader packets and other data derived from prog_data. These must be
* completely determined from prog_data. * completely determined from prog_data.
@@ -203,6 +211,8 @@ struct iris_vtable {
void (*store_derived_program_state)(const struct gen_device_info *devinfo, void (*store_derived_program_state)(const struct gen_device_info *devinfo,
enum iris_program_cache_id cache_id, enum iris_program_cache_id cache_id,
struct iris_compiled_shader *shader); struct iris_compiled_shader *shader);
uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol,
const struct brw_vue_map *vue_map);
void (*populate_vs_key)(const struct iris_context *ice, void (*populate_vs_key)(const struct iris_context *ice,
struct brw_vs_prog_key *key); struct brw_vs_prog_key *key);
void (*populate_tcs_key)(const struct iris_context *ice, void (*populate_tcs_key)(const struct iris_context *ice,
@@ -274,6 +284,8 @@ struct iris_context {
unsigned num_samplers[MESA_SHADER_STAGES]; unsigned num_samplers[MESA_SHADER_STAGES];
unsigned num_textures[MESA_SHADER_STAGES]; unsigned num_textures[MESA_SHADER_STAGES];
uint32_t *so_decl_list;
struct iris_state_ref unbound_tex; struct iris_state_ref unbound_tex;
struct u_upload_mgr *surface_uploader; struct u_upload_mgr *surface_uploader;
@@ -379,7 +391,8 @@ void iris_upload_and_bind_shader(struct iris_context *ice,
enum iris_program_cache_id cache_id, enum iris_program_cache_id cache_id,
const void *key, const void *key,
const void *assembly, const void *assembly,
struct brw_stage_prog_data *prog_data); struct brw_stage_prog_data *prog_data,
uint32_t *so_decl_list);
const void *iris_find_previous_compile(const struct iris_context *ice, const void *iris_find_previous_compile(const struct iris_context *ice,
enum iris_program_cache_id cache_id, enum iris_program_cache_id cache_id,
unsigned program_string_id); unsigned program_string_id);

View File

@@ -67,6 +67,8 @@ iris_create_shader_state(struct pipe_context *ctx,
ish->program_id = get_new_program_id(screen); ish->program_id = get_new_program_id(screen);
ish->base.type = PIPE_SHADER_IR_NIR; ish->base.type = PIPE_SHADER_IR_NIR;
ish->base.ir.nir = nir; ish->base.ir.nir = nir;
memcpy(&ish->base.stream_output, &state->stream_output,
sizeof(struct pipe_stream_output_info));
return ish; return ish;
} }
@@ -280,7 +282,12 @@ iris_compile_vs(struct iris_context *ice,
iris_setup_push_uniform_range(compiler, prog_data); iris_setup_push_uniform_range(compiler, prog_data);
iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data); uint32_t *so_decls =
ice->vtbl.create_so_decl_list(&ish->base.stream_output,
&vue_prog_data->vue_map);
iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data,
so_decls);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);
return true; return true;
@@ -343,7 +350,12 @@ iris_compile_tes(struct iris_context *ice,
iris_setup_push_uniform_range(compiler, prog_data); iris_setup_push_uniform_range(compiler, prog_data);
iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data); uint32_t *so_decls =
ice->vtbl.create_so_decl_list(&ish->base.stream_output,
&vue_prog_data->vue_map);
iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data,
so_decls);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);
return true; return true;
@@ -405,7 +417,12 @@ iris_compile_gs(struct iris_context *ice,
iris_setup_push_uniform_range(compiler, prog_data); iris_setup_push_uniform_range(compiler, prog_data);
iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data); uint32_t *so_decls =
ice->vtbl.create_so_decl_list(&ish->base.stream_output,
&vue_prog_data->vue_map);
iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data,
so_decls);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);
return true; return true;
@@ -468,7 +485,8 @@ iris_compile_fs(struct iris_context *ice,
iris_setup_push_uniform_range(compiler, prog_data); iris_setup_push_uniform_range(compiler, prog_data);
iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data); iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data,
NULL);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);
return true; return true;
@@ -488,18 +506,22 @@ iris_update_compiled_fs(struct iris_context *ice)
ice->shaders.last_vue_map); ice->shaders.last_vue_map);
} }
static void static struct iris_compiled_shader *
update_last_vue_map(struct iris_context *ice) last_vue_shader(struct iris_context *ice)
{ {
struct brw_stage_prog_data *prog_data;
if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
prog_data = ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data; return ice->shaders.prog[MESA_SHADER_GEOMETRY];
else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
prog_data = ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data;
else
prog_data = ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
return ice->shaders.prog[MESA_SHADER_TESS_EVAL];
return ice->shaders.prog[MESA_SHADER_VERTEX];
}
static void
update_last_vue_map(struct iris_context *ice,
struct brw_stage_prog_data *prog_data)
{
struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
struct brw_vue_map *vue_map = &vue_prog_data->vue_map; struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
struct brw_vue_map *old_map = ice->shaders.last_vue_map; struct brw_vue_map *old_map = ice->shaders.last_vue_map;
@@ -553,7 +575,12 @@ iris_update_compiled_shaders(struct iris_context *ice)
if (dirty & IRIS_DIRTY_UNCOMPILED_GS) if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
iris_update_compiled_gs(ice); iris_update_compiled_gs(ice);
update_last_vue_map(ice); struct iris_compiled_shader *shader = last_vue_shader(ice);
update_last_vue_map(ice, shader->prog_data);
if (ice->state.so_decl_list != shader->so_decl_list) {
ice->state.so_decl_list = shader->so_decl_list;
ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST;
}
if (dirty & IRIS_DIRTY_UNCOMPILED_FS) if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
iris_update_compiled_fs(ice); iris_update_compiled_fs(ice);

View File

@@ -211,7 +211,8 @@ iris_upload_shader(struct iris_context *ice,
uint32_t key_size, uint32_t key_size,
const void *key, const void *key,
const void *assembly, const void *assembly,
struct brw_stage_prog_data *prog_data) struct brw_stage_prog_data *prog_data,
uint32_t *so_decl_list)
{ {
struct iris_screen *screen = (void *) ice->ctx.screen; struct iris_screen *screen = (void *) ice->ctx.screen;
struct gen_device_info *devinfo = &screen->devinfo; struct gen_device_info *devinfo = &screen->devinfo;
@@ -241,10 +242,12 @@ iris_upload_shader(struct iris_context *ice,
} }
shader->prog_data = prog_data; shader->prog_data = prog_data;
shader->so_decl_list = so_decl_list;
ralloc_steal(shader, shader->prog_data); ralloc_steal(shader, shader->prog_data);
ralloc_steal(shader->prog_data, prog_data->param); ralloc_steal(shader->prog_data, prog_data->param);
ralloc_steal(shader->prog_data, prog_data->pull_param); ralloc_steal(shader->prog_data, prog_data->pull_param);
ralloc_steal(shader, shader->so_decl_list);
/* Store the 3DSTATE shader packets and other derived state. */ /* Store the 3DSTATE shader packets and other derived state. */
ice->vtbl.store_derived_program_state(devinfo, cache_id, shader); ice->vtbl.store_derived_program_state(devinfo, cache_id, shader);
@@ -265,13 +268,14 @@ iris_upload_and_bind_shader(struct iris_context *ice,
enum iris_program_cache_id cache_id, enum iris_program_cache_id cache_id,
const void *key, const void *key,
const void *assembly, const void *assembly,
struct brw_stage_prog_data *prog_data) struct brw_stage_prog_data *prog_data,
uint32_t *so_decl_list)
{ {
assert(cache_id != IRIS_CACHE_BLORP); assert(cache_id != IRIS_CACHE_BLORP);
struct iris_compiled_shader *shader = struct iris_compiled_shader *shader =
iris_upload_shader(ice, cache_id, key_size_for_cache(cache_id), key, iris_upload_shader(ice, cache_id, key_size_for_cache(cache_id), key,
assembly, prog_data); assembly, prog_data, so_decl_list);
ice->shaders.prog[cache_id] = shader; ice->shaders.prog[cache_id] = shader;
ice->state.dirty |= dirty_flag_for_cache(cache_id); ice->state.dirty |= dirty_flag_for_cache(cache_id);
@@ -318,7 +322,7 @@ iris_blorp_upload_shader(struct blorp_batch *blorp_batch,
struct iris_compiled_shader *shader = struct iris_compiled_shader *shader =
iris_upload_shader(ice, IRIS_CACHE_BLORP, key_size, key, kernel, iris_upload_shader(ice, IRIS_CACHE_BLORP, key_size, key, kernel,
prog_data); prog_data, NULL);
struct iris_bo *bo = iris_resource_bo(shader->assembly.res); struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
*kernel_out = *kernel_out =

View File

@@ -1655,6 +1655,96 @@ iris_set_stream_output_targets(struct pipe_context *ctx,
{ {
} }
static uint32_t *
iris_create_so_decl_list(const struct pipe_stream_output_info *info,
const struct brw_vue_map *vue_map)
{
struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
int max_decls = 0;
STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
if (info->num_outputs == 0)
return NULL;
memset(so_decl, 0, sizeof(so_decl));
/* Construct the list of SO_DECLs to be emitted. The formatting of the
* command feels strange -- each dword pair contains a SO_DECL per stream.
*/
for (unsigned i = 0; i < info->num_outputs; i++) {
const struct pipe_stream_output *output = &info->output[i];
const int buffer = output->output_buffer;
const int varying = output->register_index;
const unsigned stream_id = output->stream;
assert(stream_id < MAX_VERTEX_STREAMS);
buffer_mask[stream_id] |= 1 << buffer;
assert(vue_map->varying_to_slot[varying] >= 0);
/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
* array. Instead, it simply increments DstOffset for the following
* input by the number of components that should be skipped.
*
* Our hardware is unusual in that it requires us to program SO_DECLs
* for fake "hole" components, rather than simply taking the offset
* for each real varying. Each hole can have size 1, 2, 3, or 4; we
* program as many size = 4 holes as we can, then a final hole to
* accommodate the final 1, 2, or 3 remaining.
*/
int skip_components = output->dst_offset - next_offset[buffer];
while (skip_components > 0) {
so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
.HoleFlag = 1,
.OutputBufferSlot = output->output_buffer,
.ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
};
skip_components -= 4;
}
next_offset[buffer] = output->dst_offset + output->num_components;
so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
.OutputBufferSlot = output->output_buffer,
.RegisterIndex = vue_map->varying_to_slot[varying],
.ComponentMask =
((1 << output->num_components) - 1) << output->start_component,
};
if (decls[stream_id] > max_decls)
max_decls = decls[stream_id];
}
uint32_t *dw = ralloc_size(NULL, sizeof(uint32_t) * (3 + 2 * max_decls));
iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), dw, list) {
list.DWordLength = 3 + 2 * max_decls - 2;
list.StreamtoBufferSelects0 = buffer_mask[0];
list.StreamtoBufferSelects1 = buffer_mask[1];
list.StreamtoBufferSelects2 = buffer_mask[2];
list.StreamtoBufferSelects3 = buffer_mask[3];
list.NumEntries0 = decls[0];
list.NumEntries1 = decls[1];
list.NumEntries2 = decls[2];
list.NumEntries3 = decls[3];
}
for (int i = 0; i < max_decls; i++) {
iris_pack_state(GENX(SO_DECL_ENTRY), dw + 2 + i * 2, entry) {
entry.Stream0Decl = so_decl[0][i];
entry.Stream1Decl = so_decl[1][i];
entry.Stream2Decl = so_decl[2][i];
entry.Stream3Decl = so_decl[3][i];
}
}
return dw;
}
static void static void
iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots, iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots,
const struct brw_vue_map *last_vue_map, const struct brw_vue_map *last_vue_map,
@@ -2459,8 +2549,6 @@ iris_restore_context_saved_bos(struct iris_context *ice,
} }
} }
// XXX: 3DSTATE_SO_BUFFER
if (clean & IRIS_DIRTY_DEPTH_BUFFER) { if (clean & IRIS_DIRTY_DEPTH_BUFFER) {
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
@@ -2695,10 +2783,13 @@ iris_upload_render_state(struct iris_context *ice,
} }
} }
if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.so_decl_list) {
iris_batch_emit(batch, ice->state.so_decl_list,
4 * ((ice->state.so_decl_list[0] & 0xff) + 2));
}
// XXX: SOL: // XXX: SOL:
// 3DSTATE_STREAMOUT // 3DSTATE_STREAMOUT
// 3DSTATE_SO_BUFFER
// 3DSTATE_SO_DECL_LIST
if (dirty & IRIS_DIRTY_CLIP) { if (dirty & IRIS_DIRTY_CLIP) {
struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
@@ -3391,6 +3482,7 @@ genX(init_state)(struct iris_context *ice)
ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
ice->vtbl.derived_program_state_size = iris_derived_program_state_size; ice->vtbl.derived_program_state_size = iris_derived_program_state_size;
ice->vtbl.store_derived_program_state = iris_store_derived_program_state; ice->vtbl.store_derived_program_state = iris_store_derived_program_state;
ice->vtbl.create_so_decl_list = iris_create_so_decl_list;
ice->vtbl.populate_vs_key = iris_populate_vs_key; ice->vtbl.populate_vs_key = iris_populate_vs_key;
ice->vtbl.populate_tcs_key = iris_populate_tcs_key; ice->vtbl.populate_tcs_key = iris_populate_tcs_key;
ice->vtbl.populate_tes_key = iris_populate_tes_key; ice->vtbl.populate_tes_key = iris_populate_tes_key;