iris: better ubo handling

This commit is contained in:
Kenneth Graunke
2018-06-06 02:16:52 -07:00
parent a504b98e72
commit 26cc609927
4 changed files with 90 additions and 82 deletions

View File

@@ -87,29 +87,6 @@ struct blorp_params;
#define IRIS_DIRTY_CONSTANTS_FS (1ull << 39) #define IRIS_DIRTY_CONSTANTS_FS (1ull << 39)
#define IRIS_DIRTY_DEPTH_BUFFER (1ull << 40) #define IRIS_DIRTY_DEPTH_BUFFER (1ull << 40)
enum brw_param_domain {
BRW_PARAM_DOMAIN_BUILTIN = 0,
BRW_PARAM_DOMAIN_PARAMETER,
BRW_PARAM_DOMAIN_UNIFORM,
BRW_PARAM_DOMAIN_IMAGE,
};
#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val))
#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24)
#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff)
#define BRW_PARAM_PARAMETER(idx, comp) \
BRW_PARAM(PARAMETER, ((idx) << 2) | (comp))
#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2)
#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3)
#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx))
#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param)
#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8)
#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf)
struct iris_depth_stencil_alpha_state; struct iris_depth_stencil_alpha_state;
enum iris_program_cache_id { enum iris_program_cache_id {
@@ -195,9 +172,8 @@ struct iris_compiled_shader {
struct iris_shader_state { struct iris_shader_state {
struct pipe_constant_buffer constbuf[PIPE_MAX_CONSTANT_BUFFERS]; struct pipe_constant_buffer constbuf[PIPE_MAX_CONSTANT_BUFFERS];
struct pipe_resource *push_resource; struct pipe_resource *const_resources[PIPE_MAX_CONSTANT_BUFFERS];
unsigned const_offset; unsigned const_offset;
unsigned const_size;
}; };
struct iris_vtable { struct iris_vtable {

View File

@@ -64,19 +64,6 @@ iris_create_shader_state(struct pipe_context *ctx,
nir = brw_preprocess_nir(screen->compiler, nir); nir = brw_preprocess_nir(screen->compiler, nir);
#if 0
/* Reassign uniform locations using type_size_scalar_bytes instead of
* the slot based calculation that st_nir uses.
*/
nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
type_size_scalar_bytes);
nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
#endif
nir_foreach_variable(var, &nir->uniforms) {
var->data.driver_location *= 4;
}
nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
ish->program_id = get_new_program_id(screen); ish->program_id = get_new_program_id(screen);
ish->base.type = PIPE_SHADER_IR_NIR; ish->base.type = PIPE_SHADER_IR_NIR;
ish->base.ir.nir = nir; ish->base.ir.nir = nir;
@@ -154,10 +141,12 @@ iris_bind_fs_state(struct pipe_context *ctx, void *hwcso)
*/ */
static uint32_t static uint32_t
assign_common_binding_table_offsets(const struct gen_device_info *devinfo, assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
const struct shader_info *info, const struct nir_shader *nir,
struct brw_stage_prog_data *prog_data, struct brw_stage_prog_data *prog_data,
uint32_t next_binding_table_offset) uint32_t next_binding_table_offset)
{ {
const struct shader_info *info = &nir->info;
if (info->num_textures) { if (info->num_textures) {
prog_data->binding_table.texture_start = next_binding_table_offset; prog_data->binding_table.texture_start = next_binding_table_offset;
prog_data->binding_table.gather_texture_start = next_binding_table_offset; prog_data->binding_table.gather_texture_start = next_binding_table_offset;
@@ -167,10 +156,12 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
} }
if (info->num_ubos) { int num_ubos = info->num_ubos + (nir->num_uniforms > 0 ? 1 : 0);
if (num_ubos) {
//assert(info->num_ubos <= BRW_MAX_UBO); //assert(info->num_ubos <= BRW_MAX_UBO);
prog_data->binding_table.ubo_start = next_binding_table_offset; prog_data->binding_table.ubo_start = next_binding_table_offset;
next_binding_table_offset += info->num_ubos; next_binding_table_offset += num_ubos;
} else { } else {
prog_data->binding_table.ubo_start = 0xd0d0d0d0; prog_data->binding_table.ubo_start = 0xd0d0d0d0;
} }
@@ -213,28 +204,41 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
} }
static void static void
iris_setup_uniforms(void *mem_ctx, iris_setup_uniforms(const struct brw_compiler *compiler,
void *mem_ctx,
nir_shader *nir, nir_shader *nir,
struct brw_stage_prog_data *prog_data) struct brw_stage_prog_data *prog_data)
{ {
prog_data->nr_params = nir->num_uniforms * 4; prog_data->nr_params = nir->num_uniforms;
prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params); prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
nir->num_uniforms *= 16;
nir_foreach_variable(var, &nir->uniforms) { nir_foreach_variable(var, &nir->uniforms) {
/* UBO's, atomics and samplers don't take up space */
//if (var->interface_type != NULL || var->type->contains_atomic())
//continue;
const unsigned components = glsl_get_components(var->type); const unsigned components = glsl_get_components(var->type);
for (unsigned i = 0; i < 4; i++) { for (unsigned i = 0; i < components; i++) {
prog_data->param[var->data.driver_location] = prog_data->param[var->data.driver_location] =
i < components ? BRW_PARAM_PARAMETER(var->data.driver_location, i) var->data.driver_location;
: BRW_PARAM_BUILTIN_ZERO;
} }
} }
// XXX: vs clip planes?
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
}
static void
iris_setup_push_uniform_range(const struct brw_compiler *compiler,
struct brw_stage_prog_data *prog_data)
{
if (prog_data->nr_params) {
for (int i = 3; i > 0; i--)
prog_data->ubo_ranges[i] = prog_data->ubo_ranges[i - 1];
prog_data->ubo_ranges[0] = (struct brw_ubo_range) {
.block = 0,
.start = 0,
.length = DIV_ROUND_UP(prog_data->nr_params, 8),
};
}
} }
static bool static bool
@@ -256,9 +260,9 @@ iris_compile_vs(struct iris_context *ice,
nir_shader *nir = ish->base.ir.nir; nir_shader *nir = ish->base.ir.nir;
// XXX: alt mode // XXX: alt mode
assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, 0); assign_common_binding_table_offsets(devinfo, nir, prog_data, 0);
iris_setup_uniforms(mem_ctx, nir, prog_data); iris_setup_uniforms(compiler, mem_ctx, nir, prog_data);
brw_compute_vue_map(devinfo, brw_compute_vue_map(devinfo,
&vue_prog_data->vue_map, nir->info.outputs_written, &vue_prog_data->vue_map, nir->info.outputs_written,
@@ -274,6 +278,8 @@ iris_compile_vs(struct iris_context *ice,
return false; return false;
} }
iris_setup_push_uniform_range(compiler, prog_data);
iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data); iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);
@@ -317,7 +323,7 @@ iris_compile_tes(struct iris_context *ice,
nir_shader *nir = ish->base.ir.nir; nir_shader *nir = ish->base.ir.nir;
assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, 0); assign_common_binding_table_offsets(devinfo, nir, prog_data, 0);
struct brw_vue_map input_vue_map; struct brw_vue_map input_vue_map;
brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
@@ -333,6 +339,8 @@ iris_compile_tes(struct iris_context *ice,
return false; return false;
} }
iris_setup_push_uniform_range(compiler, prog_data);
iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data); iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);
@@ -383,10 +391,10 @@ iris_compile_fs(struct iris_context *ice,
nir_shader *nir = ish->base.ir.nir; nir_shader *nir = ish->base.ir.nir;
// XXX: alt mode // XXX: alt mode
assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, assign_common_binding_table_offsets(devinfo, nir, prog_data,
MAX2(key->nr_color_regions, 1)); MAX2(key->nr_color_regions, 1));
iris_setup_uniforms(mem_ctx, nir, prog_data); iris_setup_uniforms(compiler, mem_ctx, nir, prog_data);
char *error_str = NULL; char *error_str = NULL;
const unsigned *program = const unsigned *program =
@@ -400,6 +408,8 @@ iris_compile_fs(struct iris_context *ice,
//brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch); //brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
iris_setup_push_uniform_range(compiler, prog_data);
iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data); iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data);
ralloc_free(mem_ctx); ralloc_free(mem_ctx);

View File

@@ -139,6 +139,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
case PIPE_CAP_CULL_DISTANCE: case PIPE_CAP_CULL_DISTANCE:
case PIPE_CAP_PACKED_UNIFORMS:
return true; return true;
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
@@ -154,7 +155,6 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_FENCE_SIGNAL:
case PIPE_CAP_CONSTBUF0_FLAGS: case PIPE_CAP_CONSTBUF0_FLAGS:
case PIPE_CAP_PACKED_UNIFORMS:
case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:

View File

@@ -1340,12 +1340,23 @@ iris_set_framebuffer_state(struct pipe_context *ctx,
static void static void
iris_set_constant_buffer(struct pipe_context *ctx, iris_set_constant_buffer(struct pipe_context *ctx,
enum pipe_shader_type p_stage, unsigned index, enum pipe_shader_type p_stage, unsigned index,
const struct pipe_constant_buffer *cb) const struct pipe_constant_buffer *input)
{ {
struct iris_context *ice = (struct iris_context *) ctx; struct iris_context *ice = (struct iris_context *) ctx;
gl_shader_stage stage = stage_from_pipe(p_stage); gl_shader_stage stage = stage_from_pipe(p_stage);
struct iris_shader_state *shs = &ice->shaders.state[stage];
util_copy_constant_buffer(&ice->shaders.state[stage].constbuf[index], cb); if (input && (input->buffer || input->user_buffer)) {
if (input->user_buffer) {
u_upload_data(ctx->const_uploader, 0, input->buffer_size, 32,
input->user_buffer, &shs->const_offset,
&shs->const_resources[index]);
} else {
pipe_resource_reference(&shs->const_resources[index], input->buffer);
}
} else {
pipe_resource_reference(&shs->const_resources[index], NULL);
}
} }
static void static void
@@ -2170,36 +2181,47 @@ iris_upload_render_state(struct iris_context *ice,
if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
continue; continue;
struct pipe_constant_buffer *cbuf0 =
&ice->shaders.state[stage].constbuf[0];
if (!ice->shaders.prog[stage] || cbuf0->buffer || !cbuf0->buffer_size)
continue;
struct iris_shader_state *shs = &ice->shaders.state[stage]; struct iris_shader_state *shs = &ice->shaders.state[stage];
struct iris_compiled_shader *shader = ice->shaders.prog[stage]; struct iris_compiled_shader *shader = ice->shaders.prog[stage];
struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
// XXX: DIV_ROUND_UP(prog_data->nr_params, 8)?
//shs->const_size = DIV_ROUND_UP(cbuf0->buffer_size, 32);
shs->const_size = DIV_ROUND_UP(prog_data->nr_params, 8);
u_upload_data(ice->ctx.const_uploader, 0, 32 * shs->const_size, 32,
cbuf0->user_buffer, &shs->const_offset,
&shs->push_resource);
}
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (!shader)
// XXX: wrong dirty tracking...
if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
continue; continue;
struct iris_shader_state *shs = &ice->shaders.state[stage]; struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
struct iris_resource *res = (void *) shs->push_resource;
iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) { iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
if (res) { if (prog_data) {
pkt.ConstantBody.ReadLength[3] = shs->const_size; /* The Skylake PRM contains the following restriction:
pkt.ConstantBody.Buffer[3] = ro_bo(res->bo, shs->const_offset); *
* "The driver must ensure The following case does not occur
* without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
* buffer 3 read length equal to zero committed followed by a
* 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
* zero committed."
*
* To avoid this, we program the buffers in the highest slots.
* This way, slot 0 is only used if slot 3 is also used.
*/
int n = 3;
for (int i = 3; i >= 0; i--) {
const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
if (range->length == 0)
continue;
// XXX: is range->block a constbuf index? it would be nice
struct iris_resource *res =
(void *) shs->const_resources[range->block];
assert(shs->const_offset % 32 == 0);
pkt.ConstantBody.ReadLength[n] = range->length;
pkt.ConstantBody.Buffer[n] =
ro_bo(res->bo, range->start * 32 + shs->const_offset);
n--;
}
} }
} }
} }