i965: Push UBO data, but don't use it just yet.
This patch starts uploading UBO data via 3DSTATE_CONSTANT_* packets, and updates the compiler to know that there's extra payload data, so things continue working. However, it still issues pull loads for all data. I wanted to separate the two aspects for greater bisectability. v2: Update for new intel_bufferobj_buffer parameter. Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
@@ -1383,7 +1383,14 @@ fs_visitor::emit_gs_thread_end()
|
|||||||
void
|
void
|
||||||
fs_visitor::assign_curb_setup()
|
fs_visitor::assign_curb_setup()
|
||||||
{
|
{
|
||||||
prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8;
|
unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
|
||||||
|
|
||||||
|
unsigned ubo_push_length = 0;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
ubo_push_length += stage_prog_data->ubo_ranges[i].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
prog_data->curb_read_length = uniform_push_length + ubo_push_length;
|
||||||
|
|
||||||
/* Map the offsets in the UNIFORM file to fixed HW regs. */
|
/* Map the offsets in the UNIFORM file to fixed HW regs. */
|
||||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||||
|
@@ -1776,6 +1776,9 @@ vec4_visitor::setup_uniforms(int reg)
|
|||||||
reg += ALIGN(uniforms, 2) / 2;
|
reg += ALIGN(uniforms, 2) / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
reg += stage_prog_data->ubo_ranges[i].length;
|
||||||
|
|
||||||
stage_prog_data->nr_params = this->uniforms * 4;
|
stage_prog_data->nr_params = this->uniforms * 4;
|
||||||
|
|
||||||
prog_data->base.curb_read_length =
|
prog_data->base.curb_read_length =
|
||||||
|
@@ -1446,6 +1446,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stage_state->push_constants_dirty = true;
|
||||||
|
|
||||||
if (prog->info.num_ubos || prog->info.num_ssbos)
|
if (prog->info.num_ubos || prog->info.num_ssbos)
|
||||||
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
|
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
|
||||||
}
|
}
|
||||||
|
@@ -2847,6 +2847,8 @@ UNUSED static const uint32_t push_constant_opcodes[] = {
|
|||||||
static void
|
static void
|
||||||
genX(upload_push_constant_packets)(struct brw_context *brw)
|
genX(upload_push_constant_packets)(struct brw_context *brw)
|
||||||
{
|
{
|
||||||
|
struct gl_context *ctx = &brw->ctx;
|
||||||
|
|
||||||
UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
|
UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
|
||||||
|
|
||||||
struct brw_stage_state *stage_states[] = {
|
struct brw_stage_state *stage_states[] = {
|
||||||
@@ -2863,19 +2865,71 @@ genX(upload_push_constant_packets)(struct brw_context *brw)
|
|||||||
|
|
||||||
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
|
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
|
||||||
struct brw_stage_state *stage_state = stage_states[stage];
|
struct brw_stage_state *stage_state = stage_states[stage];
|
||||||
bool active = stage_state->prog_data && stage_state->push_const_size > 0;
|
struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
|
||||||
|
|
||||||
if (!stage_state->push_constants_dirty)
|
if (!stage_state->push_constants_dirty)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
|
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
|
||||||
pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
|
pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
|
||||||
if (active) {
|
if (stage_state->prog_data) {
|
||||||
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
|
/* The Skylake PRM contains the following restriction:
|
||||||
pkt.ConstantBody.Buffer[2] =
|
*
|
||||||
render_ro_bo(stage_state->push_const_bo,
|
* "The driver must ensure The following case does not occur
|
||||||
stage_state->push_const_offset);
|
* without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
|
||||||
|
* buffer 3 read length equal to zero committed followed by a
|
||||||
|
* 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
|
||||||
|
* zero committed."
|
||||||
|
*
|
||||||
|
* To avoid this, we program the buffers in the highest slots.
|
||||||
|
* This way, slot 0 is only used if slot 3 is also used.
|
||||||
|
*/
|
||||||
|
int n = 3;
|
||||||
|
|
||||||
|
for (int i = 3; i >= 0; i--) {
|
||||||
|
const struct brw_ubo_range *range =
|
||||||
|
&stage_state->prog_data->ubo_ranges[i];
|
||||||
|
|
||||||
|
if (range->length == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const struct gl_uniform_block *block =
|
||||||
|
prog->sh.UniformBlocks[range->block];
|
||||||
|
const struct gl_uniform_buffer_binding *binding =
|
||||||
|
&ctx->UniformBufferBindings[block->Binding];
|
||||||
|
|
||||||
|
if (binding->BufferObject == ctx->Shared->NullBufferObj) {
|
||||||
|
static unsigned msg_id = 0;
|
||||||
|
_mesa_gl_debug(ctx, &msg_id, MESA_DEBUG_SOURCE_API,
|
||||||
|
MESA_DEBUG_TYPE_UNDEFINED,
|
||||||
|
MESA_DEBUG_SEVERITY_HIGH,
|
||||||
|
"UBO %d unbound, %s shader uniform data "
|
||||||
|
"will be undefined.",
|
||||||
|
range->block,
|
||||||
|
_mesa_shader_stage_to_string(stage));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(binding->Offset % 32 == 0);
|
||||||
|
|
||||||
|
struct brw_bo *bo = intel_bufferobj_buffer(brw,
|
||||||
|
intel_buffer_object(binding->BufferObject),
|
||||||
|
binding->Offset, range->length * 32, false);
|
||||||
|
|
||||||
|
pkt.ConstantBody.ReadLength[n] = range->length;
|
||||||
|
pkt.ConstantBody.Buffer[n] =
|
||||||
|
render_ro_bo(bo, range->start * 32 + binding->Offset);
|
||||||
|
n--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stage_state->push_const_size > 0) {
|
||||||
|
assert(n >= 0);
|
||||||
|
pkt.ConstantBody.ReadLength[n] = stage_state->push_const_size;
|
||||||
|
pkt.ConstantBody.Buffer[n] =
|
||||||
|
render_ro_bo(stage_state->push_const_bo,
|
||||||
|
stage_state->push_const_offset);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
|
pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
|
||||||
pkt.ConstantBody.Buffer[0].offset =
|
pkt.ConstantBody.Buffer[0].offset =
|
||||||
@@ -3596,7 +3650,8 @@ genX(upload_ps)(struct brw_context *brw)
|
|||||||
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
|
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (prog_data->base.nr_params > 0)
|
if (prog_data->base.nr_params > 0 ||
|
||||||
|
prog_data->base.ubo_ranges[0].length > 0)
|
||||||
ps.PushConstantEnable = true;
|
ps.PushConstantEnable = true;
|
||||||
|
|
||||||
#if GEN_GEN < 8
|
#if GEN_GEN < 8
|
||||||
|
Reference in New Issue
Block a user