iris: little bits of compute basics

This commit is contained in:
Kenneth Graunke
2018-07-26 21:59:20 -07:00
parent 860ce6af3f
commit 9fc672428d
9 changed files with 325 additions and 7 deletions

View File

@@ -178,6 +178,24 @@ iris_binder_reserve_3d(struct iris_context *ice)
}
}
void
iris_binder_reserve_compute(struct iris_context *ice)
{
if (!(ice->state.dirty & IRIS_DIRTY_BINDINGS_CS))
return;
struct iris_binder *binder = &ice->state.binder;
struct brw_stage_prog_data *prog_data =
ice->shaders.prog[MESA_SHADER_COMPUTE]->prog_data;
unsigned size = prog_data->binding_table.size_bytes;
if (size == 0)
return;
binder->bt_offset[MESA_SHADER_COMPUTE] = iris_binder_reserve(ice, size);
}
void
iris_init_binder(struct iris_context *ice)
{

View File

@@ -53,5 +53,6 @@ void iris_init_binder(struct iris_context *ice);
void iris_destroy_binder(struct iris_binder *binder);
uint32_t iris_binder_reserve(struct iris_context *ice, unsigned size);
void iris_binder_reserve_3d(struct iris_context *ice);
void iris_binder_reserve_compute(struct iris_context *ice);
#endif

View File

@@ -208,6 +208,8 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
genX_call(devinfo, init_blorp, ice);
ice->vtbl.init_render_context(screen, &ice->render_batch, &ice->vtbl,
&ice->dbg);
ice->vtbl.init_compute_context(screen, &ice->compute_batch, &ice->vtbl,
&ice->dbg);
return ctx;
}

View File

@@ -277,11 +277,18 @@ struct iris_vtable {
struct iris_batch *batch,
struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg);
void (*init_compute_context)(struct iris_screen *screen,
struct iris_batch *batch,
struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg);
void (*upload_render_state)(struct iris_context *ice,
struct iris_batch *batch,
const struct pipe_draw_info *draw);
void (*update_surface_base_address)(struct iris_batch *batch,
struct iris_binder *binder);
void (*upload_compute_state)(struct iris_context *ice,
struct iris_batch *batch,
const struct pipe_grid_info *grid);
void (*load_register_imm32)(struct iris_batch *batch, uint32_t reg,
uint32_t val);
void (*load_register_imm64)(struct iris_batch *batch, uint32_t reg,
@@ -326,6 +333,8 @@ struct iris_vtable {
struct brw_gs_prog_key *key);
void (*populate_fs_key)(const struct iris_context *ice,
struct brw_wm_prog_key *key);
void (*populate_cs_key)(const struct iris_context *ice,
struct brw_cs_prog_key *key);
};
/**
@@ -363,6 +372,9 @@ struct iris_context {
/** The main batch for rendering. */
struct iris_batch render_batch;
/** The batch for compute shader dispatch */
struct iris_batch compute_batch;
struct {
struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
struct iris_compiled_shader *prog[MESA_SHADER_STAGES];
@@ -471,6 +483,8 @@ void iris_init_program_functions(struct pipe_context *ctx);
void iris_init_resource_functions(struct pipe_context *ctx);
void iris_init_query_functions(struct pipe_context *ctx);
void iris_update_compiled_shaders(struct iris_context *ice);
void iris_update_compiled_compute_shader(struct iris_context *ice);
/* iris_blit.c */
void iris_blorp_surf_for_resource(struct blorp_surf *surf,
@@ -481,6 +495,7 @@ void iris_blorp_surf_for_resource(struct blorp_surf *surf,
/* iris_draw.c */
void iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
void iris_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
/* iris_pipe_control.c */

View File

@@ -74,7 +74,10 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
iris_batch_maybe_flush(batch, 1500);
// XXX: check if BOs are in use by the other batches (compute), if so flush
iris_update_draw_info(ice, info);
iris_update_compiled_shaders(ice);
iris_predraw_resolve_inputs(ice, batch);
@@ -89,3 +92,30 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
iris_postdraw_update_resolve_tracking(ice, batch);
}
void
iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
{
struct iris_context *ice = (struct iris_context *) ctx;
struct iris_batch *batch = &ice->compute_batch;
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT))
ice->state.dirty |= ~0ull;
iris_batch_maybe_flush(batch, 1500);
// XXX: check if BOs are in use by the other batches (render), if so flush
//
//if (dirty & IRIS_DIRTY_UNCOMPILED_CS)
iris_update_compiled_compute_shader(ice);
// XXX: predraw resolves / cache flushing
iris_binder_reserve_compute(ice);
ice->vtbl.update_surface_base_address(batch, &ice->state.binder);
ice->vtbl.upload_compute_state(ice, batch, info);
ice->state.dirty = 0ull;
// XXX: postdraw resolve tracking
}

View File

@@ -214,6 +214,7 @@ iris_create_uncompiled_shader(struct pipe_context *ctx,
}
// XXX: precompile!
// XXX: disallow more than 64KB of shared variables
return ish;
}
@@ -1004,6 +1005,58 @@ iris_update_compiled_shaders(struct iris_context *ice)
}
}
static bool
iris_compile_cs(struct iris_context *ice,
struct iris_uncompiled_shader *ish,
const struct brw_cs_prog_key *key)
{
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
const struct brw_compiler *compiler = screen->compiler;
const struct gen_device_info *devinfo = &screen->devinfo;
void *mem_ctx = ralloc_context(NULL);
struct brw_cs_prog_data *cs_prog_data =
rzalloc(mem_ctx, struct brw_cs_prog_data);
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
nir_shader *nir = ish->nir;
cs_prog_data->binding_table.work_groups_start = 0;
assign_common_binding_table_offsets(devinfo, nir, prog_data, 1);
iris_setup_uniforms(compiler, mem_ctx, nir, prog_data);
char *error_str = NULL;
const unsigned *program =
brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data,
nir, -1, &error_str);
if (program == NULL) {
dbg_printf("Failed to compile compute shader: %s\n", error_str);
ralloc_free(mem_ctx);
return false;
}
iris_upload_and_bind_shader(ice, IRIS_CACHE_CS, key, program, prog_data,
NULL);
ralloc_free(mem_ctx);
return true;
}
void
iris_update_compiled_compute_shader(struct iris_context *ice)
{
struct iris_uncompiled_shader *ish =
ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
struct brw_cs_prog_key key = { .program_string_id = ish->program_id };
ice->vtbl.populate_cs_key(ice, &key);
if (iris_bind_cached_shader(ice, IRIS_CACHE_CS, &key))
return;
UNUSED bool success = iris_compile_cs(ice, ish, &key);
}
void
iris_init_program_functions(struct pipe_context *ctx)
{

View File

@@ -587,6 +587,10 @@ iris_screen_create(int fd)
slab_create_parent(&screen->transfer_pool,
sizeof(struct iris_transfer), 64);
screen->subslice_total =
iris_getparam_integer(screen, I915_PARAM_SUBSLICE_TOTAL);
assert(screen->subslice_total >= 1);
struct pipe_screen *pscreen = &screen->base;
iris_init_screen_resource_functions(pscreen);

View File

@@ -51,6 +51,8 @@ struct iris_screen {
/** Global program_string_id counter (see get_program_string_id()) */
unsigned program_id;
unsigned subslice_total;
struct gen_device_info devinfo;
struct isl_device isl_dev;
struct iris_bufmgr *bufmgr;

View File

@@ -610,6 +610,54 @@ iris_init_render_context(struct iris_screen *screen,
}
}
static void
iris_init_compute_context(struct iris_screen *screen,
struct iris_batch *batch,
struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg)
{
iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER);
/* XXX: PIPE_CONTROLs */
iris_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) {
sel.PipelineSelection = GPGPU;
}
iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
#if 0
// XXX: MOCS is stupid for this.
sba.GeneralStateMemoryObjectControlState = MOCS_WB;
sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB;
sba.SurfaceStateMemoryObjectControlState = MOCS_WB;
sba.DynamicStateMemoryObjectControlState = MOCS_WB;
sba.IndirectObjectMemoryObjectControlState = MOCS_WB;
sba.InstructionMemoryObjectControlState = MOCS_WB;
sba.BindlessSurfaceStateMemoryObjectControlState = MOCS_WB;
#endif
sba.GeneralStateBaseAddressModifyEnable = true;
sba.SurfaceStateBaseAddressModifyEnable = true;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.IndirectObjectBaseAddressModifyEnable = true;
sba.InstructionBaseAddressModifyEnable = true;
sba.GeneralStateBufferSizeModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
sba.IndirectObjectBufferSizeModifyEnable = true;
sba.InstructionBuffersizeModifyEnable = true;
sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START);
sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SURFACE_START);
sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START);
sba.GeneralStateBufferSize = 0xfffff;
sba.IndirectObjectBufferSize = 0xfffff;
sba.InstructionBufferSize = 0xfffff;
sba.DynamicStateBufferSize = 0xfffff;
}
}
struct iris_vertex_buffer_state {
/** The 3DSTATE_VERTEX_BUFFERS hardware packet. */
uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
@@ -646,12 +694,6 @@ struct iris_genx_state {
uint32_t streamout[4 * GENX(3DSTATE_STREAMOUT_length)];
};
// XXX: move this to iris_draw.c
static void
iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
{
}
/**
* The pipe->set_blend_color() driver hook.
*
@@ -2826,6 +2868,13 @@ iris_populate_fs_key(const struct iris_context *ice,
// XXX: respect hint for high_quality_derivatives:1;
}
static void
iris_populate_cs_key(const struct iris_context *ice,
struct brw_cs_prog_key *key)
{
iris_populate_sampler_key(ice, &key->tex);
}
#if 0
// XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS
pkt.SamplerCount = \
@@ -3074,6 +3123,26 @@ iris_store_fs_state(const struct gen_device_info *devinfo,
*
* This must match the data written by the iris_store_xs_state() functions.
*/
static void
iris_store_cs_state(const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader)
{
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data;
void *map = shader->derived_data;
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) {
desc.KernelStartPointer = KSP(shader);
desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads;
desc.SharedLocalMemorySize =
encode_slm_size(GEN_GEN, prog_data->total_shared);
desc.BarrierEnable = cs_prog_data->uses_barrier;
desc.CrossThreadConstantDataReadLength =
cs_prog_data->push.cross_thread.regs;
}
}
static unsigned
iris_derived_program_state_size(enum iris_program_cache_id cache_id)
{
@@ -3086,7 +3155,7 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id)
[IRIS_CACHE_GS] = GENX(3DSTATE_GS_length),
[IRIS_CACHE_FS] =
GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length),
[IRIS_CACHE_CS] = 0,
[IRIS_CACHE_CS] = GENX(INTERFACE_DESCRIPTOR_DATA_length),
[IRIS_CACHE_BLORP] = 0,
};
@@ -3121,6 +3190,7 @@ iris_store_derived_program_state(const struct gen_device_info *devinfo,
iris_store_fs_state(devinfo, shader);
break;
case IRIS_CACHE_CS:
iris_store_cs_state(devinfo, shader);
case IRIS_CACHE_BLORP:
break;
default:
@@ -4126,6 +4196,126 @@ iris_upload_render_state(struct iris_context *ice,
}
}
static void
iris_upload_compute_state(struct iris_context *ice,
struct iris_batch *batch,
const struct pipe_grid_info *grid)
{
const uint64_t dirty = ice->state.dirty;
struct iris_screen *screen = batch->screen;
const struct gen_device_info *devinfo = &screen->devinfo;
struct iris_binder *binder = &ice->state.binder;
struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
struct iris_compiled_shader *shader =
ice->shaders.prog[MESA_SHADER_COMPUTE];
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
if (dirty & IRIS_DIRTY_BINDINGS_CS)
iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
iris_use_optional_res(batch, shs->sampler_table.res, false);
iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false);
if (ice->state.need_border_colors)
iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false);
/* The MEDIA_VFE_STATE documentation for Gen8+ says:
*
* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
* the only bits that are changed are scoreboard related: Scoreboard
* Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
* these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
*/
iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
if (prog_data->total_scratch) {
/* Per Thread Scratch Space is in the range [0, 11] where
* 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
*/
// XXX: vfe.ScratchSpaceBasePointer
//vfe.PerThreadScratchSpace =
//ffs(stage_state->per_thread_scratch) - 11;
}
vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * screen->subslice_total - 1;
#if GEN_GEN < 11
vfe.ResetGatewayTimer =
Resettingrelativetimerandlatchingtheglobaltimestamp;
#endif
vfe.NumberofURBEntries = 2;
vfe.URBEntryAllocationSize = 2;
// XXX: Use Indirect Payload Storage?
vfe.CURBEAllocationSize =
ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
cs_prog_data->push.cross_thread.regs, 2);
}
// XXX: hack iris_set_constant_buffers to upload compute shader constants
// XXX: differently...?
if (cs_prog_data->push.total.size > 0) {
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength =
ALIGN(cs_prog_data->push.total.size, 64);
// XXX: curbe.CURBEDataStartAddress = stage_state->push_const_offset;
}
}
struct pipe_resource *desc_res = NULL;
uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
idd.SamplerStatePointer = shs->sampler_table.offset;
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
}
for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++)
desc[i] |= ((uint32_t *) shader->derived_data)[i];
iris_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
load.InterfaceDescriptorTotalLength =
GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
load.InterfaceDescriptorDataStartAddress =
emit_state(batch, ice->state.dynamic_uploader,
&desc_res, desc, sizeof(desc), 32);
}
pipe_resource_reference(&desc_res, NULL);
// XXX: grid->indirect
uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
uint32_t right_mask;
if (remainder > 0)
right_mask = ~0u >> (32 - remainder);
else
right_mask = ~0u >> (32 - cs_prog_data->simd_size);
iris_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) {
ggw.SIMDSize = cs_prog_data->simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
ggw.ThreadWidthCounterMaximum = cs_prog_data->threads - 1;
ggw.ThreadGroupIDXDimension = grid->block[0];
ggw.ThreadGroupIDYDimension = grid->block[1];
ggw.ThreadGroupIDZDimension = grid->block[2];
ggw.RightExecutionMask = right_mask;
ggw.BottomExecutionMask = 0xffffffff;
}
if (!batch->contains_draw) {
//iris_restore_context_saved_bos(ice, batch, draw);
batch->contains_draw = true;
}
}
/**
* State module teardown.
*/
@@ -4729,8 +4919,10 @@ genX(init_state)(struct iris_context *ice)
ice->vtbl.destroy_state = iris_destroy_state;
ice->vtbl.init_render_context = iris_init_render_context;
ice->vtbl.init_compute_context = iris_init_compute_context;
ice->vtbl.upload_render_state = iris_upload_render_state;
ice->vtbl.update_surface_base_address = iris_update_surface_base_address;
ice->vtbl.upload_compute_state = iris_upload_compute_state;
ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
ice->vtbl.load_register_imm32 = iris_load_register_imm32;
ice->vtbl.load_register_imm64 = iris_load_register_imm64;
@@ -4749,6 +4941,7 @@ genX(init_state)(struct iris_context *ice)
ice->vtbl.populate_tes_key = iris_populate_tes_key;
ice->vtbl.populate_gs_key = iris_populate_gs_key;
ice->vtbl.populate_fs_key = iris_populate_fs_key;
ice->vtbl.populate_cs_key = iris_populate_cs_key;
ice->state.dirty = ~0ull;