intel/fs: Add support for compiling bindless shaders with resume shaders

Instead of depending on the driver to compile each resume shader
separately, we compile them all in one go in the back-end and build an
SBT as part of the shader program.  Shader relocs are used to make the
entries in the SBT point point to the correct resume shader.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8637>
This commit is contained in:
Jason Ekstrand
2020-09-04 12:40:06 -05:00
committed by Marge Bot
parent d055ac9bdf
commit 705395344d
4 changed files with 130 additions and 20 deletions

View File

@@ -678,6 +678,7 @@ enum brw_param_builtin {
enum brw_shader_reloc_id { enum brw_shader_reloc_id {
BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
BRW_SHADER_RELOC_SHADER_START_OFFSET,
}; };
enum brw_shader_reloc_type { enum brw_shader_reloc_type {
@@ -1062,8 +1063,15 @@ brw_cs_prog_data_prog_offset(const struct brw_cs_prog_data *prog_data,
struct brw_bs_prog_data { struct brw_bs_prog_data {
struct brw_stage_prog_data base; struct brw_stage_prog_data base;
/** SIMD size of the root shader */
uint8_t simd_size; uint8_t simd_size;
uint32_t stack_size;
/** Maximum stack size of all shaders */
uint32_t max_stack_size;
/** Offset into the shader where the resume SBT is located */
uint32_t resume_sbt_offset;
}; };
struct brw_ff_gs_prog_data { struct brw_ff_gs_prog_data {
@@ -1675,6 +1683,8 @@ brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
const struct brw_bs_prog_key *key, const struct brw_bs_prog_key *key,
struct brw_bs_prog_data *prog_data, struct brw_bs_prog_data *prog_data,
struct nir_shader *shader, struct nir_shader *shader,
unsigned num_resume_shaders,
struct nir_shader **resume_shaders,
struct brw_compile_stats *stats, struct brw_compile_stats *stats,
char **error_str); char **error_str);

View File

@@ -9875,19 +9875,22 @@ brw_cs_get_dispatch_info(const struct intel_device_info *devinfo,
return info; return info;
} }
const unsigned * static uint8_t
brw_compile_bs(const struct brw_compiler *compiler, void *log_data, compile_single_bs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx, void *mem_ctx,
const struct brw_bs_prog_key *key, const struct brw_bs_prog_key *key,
struct brw_bs_prog_data *prog_data, struct brw_bs_prog_data *prog_data,
nir_shader *shader, nir_shader *shader,
struct brw_compile_stats *stats, fs_generator *g,
char **error_str) struct brw_compile_stats *stats,
int *prog_offset,
char **error_str)
{ {
const bool debug_enabled = INTEL_DEBUG & DEBUG_RT; const bool debug_enabled = INTEL_DEBUG & DEBUG_RT;
prog_data->base.stage = shader->info.stage; prog_data->base.stage = shader->info.stage;
prog_data->stack_size = shader->scratch_size; prog_data->max_stack_size = MAX2(prog_data->max_stack_size,
shader->scratch_size);
const unsigned max_dispatch_width = 16; const unsigned max_dispatch_width = 16;
brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width, true); brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width, true);
@@ -9897,6 +9900,7 @@ brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
fs_visitor *v = NULL, *v8 = NULL, *v16 = NULL; fs_visitor *v = NULL, *v8 = NULL, *v16 = NULL;
bool has_spilled = false; bool has_spilled = false;
uint8_t simd_size = 0;
if (likely(!(INTEL_DEBUG & DEBUG_NO8))) { if (likely(!(INTEL_DEBUG & DEBUG_NO8))) {
v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base, v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader, &prog_data->base, shader,
@@ -9906,10 +9910,10 @@ brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
if (error_str) if (error_str)
*error_str = ralloc_strdup(mem_ctx, v8->fail_msg); *error_str = ralloc_strdup(mem_ctx, v8->fail_msg);
delete v8; delete v8;
return NULL; return 0;
} else { } else {
v = v8; v = v8;
prog_data->simd_size = 8; simd_size = 8;
if (v8->spilled_any_registers) if (v8->spilled_any_registers)
has_spilled = true; has_spilled = true;
} }
@@ -9932,11 +9936,11 @@ brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
v16->fail_msg); v16->fail_msg);
} }
delete v16; delete v16;
return NULL; return 0;
} }
} else { } else {
v = v16; v = v16;
prog_data->simd_size = 16; simd_size = 16;
if (v16->spilled_any_registers) if (v16->spilled_any_registers)
has_spilled = true; has_spilled = true;
} }
@@ -9948,13 +9952,55 @@ brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
*error_str = ralloc_strdup(mem_ctx, *error_str = ralloc_strdup(mem_ctx,
"Cannot satisfy INTEL_DEBUG flags SIMD restrictions"); "Cannot satisfy INTEL_DEBUG flags SIMD restrictions");
} }
return NULL; return false;
} }
assert(v); assert(v);
int offset = g->generate_code(v->cfg, simd_size, v->shader_stats,
v->performance_analysis.require(), stats);
if (prog_offset)
*prog_offset = offset;
else
assert(offset == 0);
delete v8;
delete v16;
return simd_size;
}
uint64_t
brw_bsr(const struct intel_device_info *devinfo,
uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset)
{
assert(offset % 64 == 0);
assert(simd_size == 8 || simd_size == 16);
assert(local_arg_offset % 8 == 0);
return offset |
SET_BITS(simd_size > 8, 4, 4) |
SET_BITS(local_arg_offset / 8, 2, 0);
}
const unsigned *
brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_bs_prog_key *key,
struct brw_bs_prog_data *prog_data,
nir_shader *shader,
unsigned num_resume_shaders,
struct nir_shader **resume_shaders,
struct brw_compile_stats *stats,
char **error_str)
{
const bool debug_enabled = INTEL_DEBUG & DEBUG_RT;
prog_data->base.stage = shader->info.stage;
prog_data->max_stack_size = 0;
fs_generator g(compiler, log_data, mem_ctx, &prog_data->base, fs_generator g(compiler, log_data, mem_ctx, &prog_data->base,
v->runtime_check_aads_emit, shader->info.stage); false, shader->info.stage);
if (unlikely(debug_enabled)) { if (unlikely(debug_enabled)) {
char *name = ralloc_asprintf(mem_ctx, "%s %s shader %s", char *name = ralloc_asprintf(mem_ctx, "%s %s shader %s",
shader->info.label ? shader->info.label ?
@@ -9964,13 +10010,48 @@ brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
g.enable_debug(name); g.enable_debug(name);
} }
g.generate_code(v->cfg, prog_data->simd_size, v->shader_stats, prog_data->simd_size =
v->performance_analysis.require(), stats); compile_single_bs(compiler, log_data, mem_ctx, key, prog_data,
shader, &g, stats, NULL, error_str);
if (prog_data->simd_size == 0)
return NULL;
delete v8; uint64_t *resume_sbt = ralloc_array(mem_ctx, uint64_t, num_resume_shaders);
delete v16; for (unsigned i = 0; i < num_resume_shaders; i++) {
if (INTEL_DEBUG & DEBUG_RT) {
char *name = ralloc_asprintf(mem_ctx, "%s %s resume(%u) shader %s",
shader->info.label ?
shader->info.label : "unnamed",
gl_shader_stage_name(shader->info.stage),
i, shader->info.name);
g.enable_debug(name);
}
/* TODO: Figure out shader stats etc. for resume shaders */
int offset = 0;
uint8_t simd_size =
compile_single_bs(compiler, log_data, mem_ctx, key, prog_data,
resume_shaders[i], &g, NULL, &offset, error_str);
if (simd_size == 0)
return NULL;
assert(offset > 0);
resume_sbt[i] = brw_bsr(compiler->devinfo, offset, simd_size, 0);
}
/* We only have one constant data so we want to make sure they're all the
* same.
*/
for (unsigned i = 0; i < num_resume_shaders; i++) {
assert(resume_shaders[i]->constant_data_size ==
shader->constant_data_size);
assert(memcmp(resume_shaders[i]->constant_data,
shader->constant_data,
shader->constant_data_size) == 0);
}
g.add_const_data(shader->constant_data, shader->constant_data_size); g.add_const_data(shader->constant_data, shader->constant_data_size);
g.add_resume_sbt(num_resume_shaders, resume_sbt);
return g.get_assembly(); return g.get_assembly();
} }

View File

@@ -478,6 +478,7 @@ public:
const brw::performance &perf, const brw::performance &perf,
struct brw_compile_stats *stats); struct brw_compile_stats *stats);
void add_const_data(void *data, unsigned size); void add_const_data(void *data, unsigned size);
void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
const unsigned *get_assembly(); const unsigned *get_assembly();
private: private:

View File

@@ -2805,6 +2805,24 @@ fs_generator::add_const_data(void *data, unsigned size)
} }
} }
void
fs_generator::add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt)
{
assert(brw_shader_stage_is_bindless(stage));
struct brw_bs_prog_data *bs_prog_data = brw_bs_prog_data(prog_data);
if (num_resume_shaders > 0) {
bs_prog_data->resume_sbt_offset =
brw_append_data(p, sbt, num_resume_shaders * sizeof(uint64_t), 32);
for (unsigned i = 0; i < num_resume_shaders; i++) {
size_t offset = bs_prog_data->resume_sbt_offset + i * sizeof(*sbt);
assert(offset <= UINT32_MAX);
brw_add_reloc(p, BRW_SHADER_RELOC_SHADER_START_OFFSET,
BRW_SHADER_RELOC_TYPE_U32,
(uint32_t)offset, (uint32_t)sbt[i]);
}
}
}
const unsigned * const unsigned *
fs_generator::get_assembly() fs_generator::get_assembly()
{ {