iris: streamout

This commit is contained in:
Kenneth Graunke
2018-07-11 12:45:19 -07:00
parent 059c096eff
commit cef0b8b13b
4 changed files with 88 additions and 23 deletions

View File

@@ -171,9 +171,10 @@ struct iris_compiled_shader {
struct brw_stage_prog_data *prog_data; struct brw_stage_prog_data *prog_data;
/** /**
* Derived 3DSTATE_SO_DECL_LIST packet (for transform feedback). * Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
* (the VUE-based information for transform feedback outputs).
*/ */
uint32_t *so_decl_list; uint32_t *streamout;
/** /**
* Shader packets and other data derived from prog_data. These must be * Shader packets and other data derived from prog_data. These must be
@@ -284,7 +285,9 @@ struct iris_context {
unsigned num_samplers[MESA_SHADER_STAGES]; unsigned num_samplers[MESA_SHADER_STAGES];
unsigned num_textures[MESA_SHADER_STAGES]; unsigned num_textures[MESA_SHADER_STAGES];
uint32_t *so_decl_list; /** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
bool streamout_active;
uint32_t *streamout;
struct iris_state_ref unbound_tex; struct iris_state_ref unbound_tex;
@@ -392,7 +395,7 @@ void iris_upload_and_bind_shader(struct iris_context *ice,
const void *key, const void *key,
const void *assembly, const void *assembly,
struct brw_stage_prog_data *prog_data, struct brw_stage_prog_data *prog_data,
uint32_t *so_decl_list); uint32_t *streamout);
const void *iris_find_previous_compile(const struct iris_context *ice, const void *iris_find_previous_compile(const struct iris_context *ice,
enum iris_program_cache_id cache_id, enum iris_program_cache_id cache_id,
unsigned program_string_id); unsigned program_string_id);

View File

@@ -577,9 +577,9 @@ iris_update_compiled_shaders(struct iris_context *ice)
struct iris_compiled_shader *shader = last_vue_shader(ice); struct iris_compiled_shader *shader = last_vue_shader(ice);
update_last_vue_map(ice, shader->prog_data); update_last_vue_map(ice, shader->prog_data);
if (ice->state.so_decl_list != shader->so_decl_list) { if (ice->state.streamout != shader->streamout) {
ice->state.so_decl_list = shader->so_decl_list; ice->state.streamout = shader->streamout;
ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST; ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
} }
if (dirty & IRIS_DIRTY_UNCOMPILED_FS) if (dirty & IRIS_DIRTY_UNCOMPILED_FS)

View File

@@ -212,7 +212,7 @@ iris_upload_shader(struct iris_context *ice,
const void *key, const void *key,
const void *assembly, const void *assembly,
struct brw_stage_prog_data *prog_data, struct brw_stage_prog_data *prog_data,
uint32_t *so_decl_list) uint32_t *streamout)
{ {
struct iris_screen *screen = (void *) ice->ctx.screen; struct iris_screen *screen = (void *) ice->ctx.screen;
struct gen_device_info *devinfo = &screen->devinfo; struct gen_device_info *devinfo = &screen->devinfo;
@@ -242,12 +242,12 @@ iris_upload_shader(struct iris_context *ice,
} }
shader->prog_data = prog_data; shader->prog_data = prog_data;
shader->so_decl_list = so_decl_list; shader->streamout = streamout;
ralloc_steal(shader, shader->prog_data); ralloc_steal(shader, shader->prog_data);
ralloc_steal(shader->prog_data, prog_data->param); ralloc_steal(shader->prog_data, prog_data->param);
ralloc_steal(shader->prog_data, prog_data->pull_param); ralloc_steal(shader->prog_data, prog_data->pull_param);
ralloc_steal(shader, shader->so_decl_list); ralloc_steal(shader, shader->streamout);
/* Store the 3DSTATE shader packets and other derived state. */ /* Store the 3DSTATE shader packets and other derived state. */
ice->vtbl.store_derived_program_state(devinfo, cache_id, shader); ice->vtbl.store_derived_program_state(devinfo, cache_id, shader);
@@ -269,13 +269,13 @@ iris_upload_and_bind_shader(struct iris_context *ice,
const void *key, const void *key,
const void *assembly, const void *assembly,
struct brw_stage_prog_data *prog_data, struct brw_stage_prog_data *prog_data,
uint32_t *so_decl_list) uint32_t *streamout)
{ {
assert(cache_id != IRIS_CACHE_BLORP); assert(cache_id != IRIS_CACHE_BLORP);
struct iris_compiled_shader *shader = struct iris_compiled_shader *shader =
iris_upload_shader(ice, cache_id, key_size_for_cache(cache_id), key, iris_upload_shader(ice, cache_id, key_size_for_cache(cache_id), key,
assembly, prog_data, so_decl_list); assembly, prog_data, streamout);
ice->shaders.prog[cache_id] = shader; ice->shaders.prog[cache_id] = shader;
ice->state.dirty |= dirty_flag_for_cache(cache_id); ice->state.dirty |= dirty_flag_for_cache(cache_id);

View File

@@ -433,6 +433,7 @@ struct iris_genx_state {
struct iris_depth_buffer_state depth_buffer; struct iris_depth_buffer_state depth_buffer;
uint32_t so_buffers[4 * GENX(3DSTATE_SO_BUFFER_length)]; uint32_t so_buffers[4 * GENX(3DSTATE_SO_BUFFER_length)];
uint32_t streamout[4 * GENX(3DSTATE_STREAMOUT_length)];
}; };
static void static void
@@ -626,6 +627,7 @@ struct iris_rasterizer_state {
uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)]; uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
bool flatshade; /* for shader state */ bool flatshade; /* for shader state */
bool flatshade_first; /* for stream output */
bool clamp_fragment_color; /* for shader state */ bool clamp_fragment_color; /* for shader state */
bool light_twoside; /* for shader state */ bool light_twoside; /* for shader state */
bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */ bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */
@@ -657,6 +659,7 @@ iris_create_rasterizer_state(struct pipe_context *ctx,
#endif #endif
cso->flatshade = state->flatshade; cso->flatshade = state->flatshade;
cso->flatshade_first = state->flatshade_first;
cso->clamp_fragment_color = state->clamp_fragment_color; cso->clamp_fragment_color = state->clamp_fragment_color;
cso->light_twoside = state->light_twoside; cso->light_twoside = state->light_twoside;
cso->rasterizer_discard = state->rasterizer_discard; cso->rasterizer_discard = state->rasterizer_discard;
@@ -774,7 +777,7 @@ iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable)) if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable))
ice->state.dirty |= IRIS_DIRTY_WM; ice->state.dirty |= IRIS_DIRTY_WM;
if (cso_changed(rasterizer_discard)) if (cso_changed(rasterizer_discard) || cso_changed(flatshade_first))
ice->state.dirty |= IRIS_DIRTY_STREAMOUT; ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
} }
@@ -1696,7 +1699,18 @@ iris_set_stream_output_targets(struct pipe_context *ctx,
const unsigned *offsets) const unsigned *offsets)
{ {
struct iris_context *ice = (struct iris_context *) ctx; struct iris_context *ice = (struct iris_context *) ctx;
uint32_t *so_buffers = ice->state.genx->so_buffers; struct iris_genx_state *genx = ice->state.genx;
uint32_t *so_buffers = genx->so_buffers;
const bool active = num_targets > 0;
if (ice->state.streamout_active != active) {
ice->state.streamout_active = active;
ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
}
/* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */
if (!active)
return;
for (unsigned i = 0; i < 4; i++, for (unsigned i = 0; i < 4; i++,
so_buffers += GENX(3DSTATE_SO_BUFFER_length)) { so_buffers += GENX(3DSTATE_SO_BUFFER_length)) {
@@ -1792,9 +1806,36 @@ iris_create_so_decl_list(const struct pipe_stream_output_info *info,
max_decls = decls[stream_id]; max_decls = decls[stream_id];
} }
uint32_t *dw = ralloc_size(NULL, sizeof(uint32_t) * (3 + 2 * max_decls)); unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls);
uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords);
uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length);
iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), dw, list) { iris_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) {
int urb_entry_read_offset = 0;
int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
urb_entry_read_offset;
/* We always read the whole vertex. This could be reduced at some
* point by reading less and offsetting the register index in the
* SO_DECLs.
*/
sol.Stream0VertexReadOffset = urb_entry_read_offset;
sol.Stream0VertexReadLength = urb_entry_read_length - 1;
sol.Stream1VertexReadOffset = urb_entry_read_offset;
sol.Stream1VertexReadLength = urb_entry_read_length - 1;
sol.Stream2VertexReadOffset = urb_entry_read_offset;
sol.Stream2VertexReadLength = urb_entry_read_length - 1;
sol.Stream3VertexReadOffset = urb_entry_read_offset;
sol.Stream3VertexReadLength = urb_entry_read_length - 1;
/* Set buffer pitches; 0 means unbound. */
sol.Buffer0SurfacePitch = 4 * info->stride[0];
sol.Buffer1SurfacePitch = 4 * info->stride[1];
sol.Buffer2SurfacePitch = 4 * info->stride[2];
sol.Buffer3SurfacePitch = 4 * info->stride[3];
}
iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) {
list.DWordLength = 3 + 2 * max_decls - 2; list.DWordLength = 3 + 2 * max_decls - 2;
list.StreamtoBufferSelects0 = buffer_mask[0]; list.StreamtoBufferSelects0 = buffer_mask[0];
list.StreamtoBufferSelects1 = buffer_mask[1]; list.StreamtoBufferSelects1 = buffer_mask[1];
@@ -1807,7 +1848,7 @@ iris_create_so_decl_list(const struct pipe_stream_output_info *info,
} }
for (int i = 0; i < max_decls; i++) { for (int i = 0; i < max_decls; i++) {
iris_pack_state(GENX(SO_DECL_ENTRY), dw + 2 + i * 2, entry) { iris_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 2 + i * 2, entry) {
entry.Stream0Decl = so_decl[0][i]; entry.Stream0Decl = so_decl[0][i];
entry.Stream1Decl = so_decl[1][i]; entry.Stream1Decl = so_decl[1][i];
entry.Stream2Decl = so_decl[2][i]; entry.Stream2Decl = so_decl[2][i];
@@ -1815,7 +1856,7 @@ iris_create_so_decl_list(const struct pipe_stream_output_info *info,
} }
} }
return dw; return map;
} }
static void static void
@@ -2862,13 +2903,34 @@ iris_upload_render_state(struct iris_context *ice,
4 * 4 * GENX(3DSTATE_SO_BUFFER_length)); 4 * 4 * GENX(3DSTATE_SO_BUFFER_length));
} }
if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.so_decl_list) { if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.streamout) {
iris_batch_emit(batch, ice->state.so_decl_list, uint32_t *decl_list =
4 * ((ice->state.so_decl_list[0] & 0xff) + 2)); ice->state.streamout + GENX(3DSTATE_STREAMOUT_length);
iris_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2));
} }
// XXX: SOL: if (dirty & IRIS_DIRTY_STREAMOUT) {
// 3DSTATE_STREAMOUT const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
if (!ice->state.streamout_active) {
iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol);
} else {
uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)];
iris_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) {
sol.SOFunctionEnable = true;
sol.SOStatisticsEnable = true;
// XXX: GL_PRIMITIVES_GENERATED query
sol.RenderingDisable = cso_rast->rasterizer_discard;
sol.ReorderMode = cso_rast->flatshade_first ? LEADING : TRAILING;
}
assert(ice->state.streamout);
iris_emit_merge(batch, ice->state.streamout, dynamic_sol,
GENX(3DSTATE_STREAMOUT_length));
}
}
if (dirty & IRIS_DIRTY_CLIP) { if (dirty & IRIS_DIRTY_CLIP) {
struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;