lavapipe: implement EXT_inline_uniform_block

this is a lot of machinery to propagate the block sizes down from the
descriptor layout to the pipeline layout to the rendering_state

block data is appended to ubo0 immediately following push constant
data (if it exists), which requires that a new buffer be created and
filled any time either type of data changes

shader handling is done by propagating the offset of each block relative
to the start of its descriptor set, then accumulating the sizes of
every uniform block in each preceding descriptor set into the offset,
then adding on the push constant size, and finally adding that on to
the existing load_ubo deref offset

update-after-bind is no longer an issue since each instance of pc+block
data is its own immutable buffer that can never be modified

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15457>
This commit is contained in:
Mike Blumenkrantz
2022-03-18 09:56:31 -04:00
committed by Marge Bot
parent 249fe9673a
commit 1ba1ee9e7c
4 changed files with 159 additions and 14 deletions

View File

@@ -98,7 +98,10 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout(
set_layout->binding[b].descriptor_index = set_layout->size;
set_layout->binding[b].type = binding->descriptorType;
set_layout->binding[b].valid = true;
set_layout->size += binding->descriptorCount;
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
set_layout->size++;
else
set_layout->size += binding->descriptorCount;
for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; stage++) {
set_layout->binding[b].stage[stage].const_buffer_index = -1;
@@ -106,6 +109,7 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout(
set_layout->binding[b].stage[stage].sampler_index = -1;
set_layout->binding[b].stage[stage].sampler_view_index = -1;
set_layout->binding[b].stage[stage].image_index = -1;
set_layout->binding[b].stage[stage].uniform_block_index = -1;
}
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
@@ -141,6 +145,14 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDescriptorSetLayout(
set_layout->stage[s].const_buffer_count += binding->descriptorCount;
}
break;
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
lvp_foreach_stage(s, binding->stageFlags) {
set_layout->binding[b].stage[s].uniform_block_offset = set_layout->stage[s].uniform_block_size;
set_layout->binding[b].stage[s].uniform_block_index = set_layout->stage[s].uniform_block_count;
set_layout->stage[s].uniform_block_size += binding->descriptorCount;
set_layout->stage[s].uniform_block_sizes[set_layout->stage[s].uniform_block_count++] = binding->descriptorCount;
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
lvp_foreach_stage(s, binding->stageFlags) {
@@ -260,6 +272,14 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreatePipelineLayout(
LVP_FROM_HANDLE(lvp_descriptor_set_layout, set_layout,
pCreateInfo->pSetLayouts[set]);
layout->set[set].layout = set_layout;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
layout->stage[i].uniform_block_size += set_layout->stage[i].uniform_block_size;
for (unsigned j = 0; j < set_layout->stage[i].uniform_block_count; j++) {
assert(layout->stage[i].uniform_block_count + j < MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS);
layout->stage[i].uniform_block_sizes[layout->stage[i].uniform_block_count + j] = set_layout->stage[i].uniform_block_sizes[j];
}
layout->stage[i].uniform_block_count += set_layout->stage[i].uniform_block_count;
}
lvp_descriptor_set_layout_ref(set_layout);
}
@@ -341,8 +361,10 @@ lvp_descriptor_set_create(struct lvp_device *device,
struct lvp_descriptor_set **out_set)
{
struct lvp_descriptor_set *set;
size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]);
size_t base_size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]);
size_t size = base_size;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++)
size += layout->stage[i].uniform_block_size;
set = vk_alloc(&device->vk.alloc /* XXX: Use the pool */, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!set)
@@ -360,12 +382,19 @@ lvp_descriptor_set_create(struct lvp_device *device,
/* Go through and fill out immutable samplers if we have any */
struct lvp_descriptor *desc = set->descriptors;
uint8_t *uniform_mem = (uint8_t*)(set) + base_size;
for (uint32_t b = 0; b < layout->binding_count; b++) {
if (layout->binding[b].immutable_samplers) {
for (uint32_t i = 0; i < layout->binding[b].array_size; i++)
desc[i].info.sampler = layout->binding[b].immutable_samplers[i];
if (layout->binding[b].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
desc->info.uniform = uniform_mem;
uniform_mem += layout->binding[b].array_size;
desc++;
} else {
if (layout->binding[b].immutable_samplers) {
for (uint32_t i = 0; i < layout->binding[b].array_size; i++)
desc[i].info.sampler = layout->binding[b].immutable_samplers[i];
}
desc += layout->binding[b].array_size;
}
desc += layout->binding[b].array_size;
}
*out_set = set;
@@ -444,6 +473,14 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets(
&set->layout->binding[write->dstBinding];
struct lvp_descriptor *desc =
&set->descriptors[bind_layout->descriptor_index];
if (write->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
const VkWriteDescriptorSetInlineUniformBlock *uniform_data =
vk_find_struct_const(write->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK);
assert(uniform_data);
desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK;
memcpy(desc->info.uniform + write->dstArrayElement, uniform_data->pData, uniform_data->dataSize);
continue;
}
desc += write->dstArrayElement;
switch (write->descriptorType) {
@@ -540,16 +577,24 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets(
&src->layout->binding[copy->srcBinding];
struct lvp_descriptor *src_desc =
&src->descriptors[src_layout->descriptor_index];
src_desc += copy->srcArrayElement;
const struct lvp_descriptor_set_binding_layout *dst_layout =
&dst->layout->binding[copy->dstBinding];
struct lvp_descriptor *dst_desc =
&dst->descriptors[dst_layout->descriptor_index];
dst_desc += copy->dstArrayElement;
for (uint32_t j = 0; j < copy->descriptorCount; j++)
dst_desc[j] = src_desc[j];
if (src_desc->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
dst_desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK;
memcpy(dst_desc->info.uniform + copy->dstArrayElement,
src_desc->info.uniform + copy->srcArrayElement,
copy->descriptorCount);
} else {
src_desc += copy->srcArrayElement;
dst_desc += copy->dstArrayElement;
for (uint32_t j = 0; j < copy->descriptorCount; j++)
dst_desc[j] = src_desc[j];
}
}
}
@@ -689,6 +734,11 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSetWithTemplate(VkDevice _device,
&set->layout->binding[entry->dstBinding];
struct lvp_descriptor *desc =
&set->descriptors[bind_layout->descriptor_index];
if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
desc->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT;
memcpy(desc->info.uniform + entry->dstArrayElement, pSrc, entry->descriptorCount);
continue;
}
for (j = 0; j < entry->descriptorCount; ++j) {
unsigned idx = j + entry->dstArrayElement;
switch (entry->descriptorType) {

View File

@@ -146,6 +146,11 @@ struct rendering_state {
uint8_t push_constants[128 * 4];
uint16_t push_size[2]; //gfx, compute
struct {
void *block[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS];
uint16_t size[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS];
uint16_t count;
} uniform_blocks[PIPE_SHADER_TYPES];
const struct lvp_render_pass *pass;
struct lvp_subpass *subpass;
@@ -208,6 +213,8 @@ static unsigned
calc_ubo0_size(struct rendering_state *state, enum pipe_shader_type pstage)
{
unsigned size = get_pcbuf_size(state, pstage);
for (unsigned i = 0; i < state->uniform_blocks[pstage].count; i++)
size += state->uniform_blocks[pstage].size[i];
return size;
}
@@ -217,6 +224,13 @@ fill_ubo0(struct rendering_state *state, uint8_t *mem, enum pipe_shader_type pst
unsigned push_size = get_pcbuf_size(state, pstage);
if (push_size)
memcpy(mem, state->push_constants, push_size);
mem += push_size;
for (unsigned i = 0; i < state->uniform_blocks[pstage].count; i++) {
unsigned size = state->uniform_blocks[pstage].size[i];
memcpy(mem, state->uniform_blocks[pstage].block[i], size);
mem += size;
}
}
static void
@@ -418,7 +432,10 @@ static void handle_compute_pipeline(struct vk_cmd_queue_entry *cmd,
if ((pipeline->layout->push_constant_stages & VK_SHADER_STAGE_COMPUTE_BIT) > 0)
state->has_pcbuf[PIPE_SHADER_COMPUTE] = pipeline->layout->push_constant_size > 0;
if (!state->has_pcbuf[PIPE_SHADER_COMPUTE])
state->uniform_blocks[PIPE_SHADER_COMPUTE].count = pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_count;
for (unsigned j = 0; j < pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_count; j++)
state->uniform_blocks[PIPE_SHADER_COMPUTE].size[j] = pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_sizes[j];
if (!state->has_pcbuf[PIPE_SHADER_COMPUTE] && !pipeline->layout->stage[MESA_SHADER_COMPUTE].uniform_block_count)
state->pcbuf_dirty[PIPE_SHADER_COMPUTE] = false;
state->dispatch_info.block[0] = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.workgroup_size[0];
@@ -547,10 +564,16 @@ static void handle_graphics_pipeline(struct vk_cmd_queue_entry *cmd,
for (enum pipe_shader_type sh = PIPE_SHADER_VERTEX; sh < PIPE_SHADER_COMPUTE; sh++)
state->has_pcbuf[sh] = false;
for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
enum pipe_shader_type sh = pipe_shader_type_from_mesa(i);
state->uniform_blocks[sh].count = pipeline->layout->stage[i].uniform_block_count;
for (unsigned j = 0; j < pipeline->layout->stage[i].uniform_block_count; j++)
state->uniform_blocks[sh].size[j] = pipeline->layout->stage[i].uniform_block_sizes[j];
}
u_foreach_bit(stage, pipeline->layout->push_constant_stages) {
enum pipe_shader_type sh = pipe_shader_type_from_mesa(stage);
state->has_pcbuf[sh] = pipeline->layout->push_constant_size > 0;
if (!state->has_pcbuf[sh])
if (!state->has_pcbuf[sh] && !state->uniform_blocks[sh].count)
state->pcbuf_dirty[sh] = false;
}
@@ -992,6 +1015,7 @@ struct dyn_info {
uint16_t sampler_count;
uint16_t sampler_view_count;
uint16_t image_count;
uint16_t uniform_block_count;
} stage[MESA_SHADER_STAGES];
uint32_t dyn_index;
@@ -1230,6 +1254,16 @@ static void handle_descriptor(struct rendering_state *state,
type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
switch (type) {
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
int idx = binding->stage[stage].uniform_block_index;
if (idx == -1)
return;
idx += dyn_info->stage[stage].uniform_block_count;
assert(descriptor->uniform);
state->uniform_blocks[p_stage].block[idx] = descriptor->uniform;
state->pcbuf_dirty[p_stage] = true;
break;
}
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
fill_image_view_stage(state, dyn_info, stage, p_stage, array_idx, descriptor, binding);
@@ -1299,6 +1333,7 @@ static void handle_descriptor(struct rendering_state *state,
break;
default:
fprintf(stderr, "Unhandled descriptor set %d\n", type);
unreachable("oops");
break;
}
}
@@ -1316,7 +1351,8 @@ static void handle_set_stage(struct rendering_state *state,
binding = &set->layout->binding[j];
if (binding->valid) {
for (int i = 0; i < binding->array_size; i++) {
unsigned array_size = binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ? 1 : binding->array_size;
for (int i = 0; i < array_size; i++) {
descriptor = &set->descriptors[binding->descriptor_index + i];
handle_descriptor(state, dyn_info, binding, stage, p_stage, i, descriptor->type, &descriptor->info);
}
@@ -1333,6 +1369,7 @@ static void increment_dyn_info(struct dyn_info *dyn_info,
dyn_info->stage[stage].sampler_count += layout->stage[stage].sampler_count;
dyn_info->stage[stage].sampler_view_count += layout->stage[stage].sampler_view_count;
dyn_info->stage[stage].image_count += layout->stage[stage].image_count;
dyn_info->stage[stage].uniform_block_count += layout->stage[stage].uniform_block_count;
}
if (inc_dyn)
dyn_info->dyn_index += layout->dynamic_offset_count;

View File

@@ -47,6 +47,48 @@ lower_vulkan_resource_index(const nir_instr *instr, const void *data_cb)
return false;
}
static bool
lower_uniform_block_access(const nir_instr *instr, const void *data_cb)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_deref)
return false;
nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
return deref->modes == nir_var_mem_ubo;
}
static nir_ssa_def *
lower_block_instr(nir_builder *b, nir_instr *instr, void *data_cb)
{
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
nir_binding nb = nir_chase_binding(intrin->src[0]);
struct lvp_pipeline_layout *layout = data_cb;
struct lvp_descriptor_set_binding_layout *binding = &layout->set[nb.desc_set].layout->binding[nb.binding];
if (binding->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
return NULL;
if (!binding->array_size)
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
assert(intrin->src[0].ssa->num_components == 2);
unsigned value = 0;
for (unsigned s = 0; s < nb.desc_set; s++)
value += layout->set[s].layout->stage[b->shader->info.stage].uniform_block_size;
if (layout->push_constant_stages & BITFIELD_BIT(b->shader->info.stage))
value += layout->push_constant_size;
value += binding->stage[b->shader->info.stage].uniform_block_offset;
b->cursor = nir_before_instr(instr);
nir_ssa_def *offset = nir_imm_ivec2(b, 0, value);
nir_ssa_def *added = nir_iadd(b, intrin->src[0].ssa, offset);
nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
nir_deref_instr *cast = nir_build_deref_cast(b, added, deref->modes, deref->type, 0);
nir_instr_rewrite_src_ssa(instr, &intrin->src[0], &cast->dest.ssa);
return NIR_LOWER_INSTR_PROGRESS;
}
static nir_ssa_def *lower_vri_intrin_vri(struct nir_builder *b,
nir_instr *instr, void *data_cb)
{
@@ -59,6 +101,10 @@ static nir_ssa_def *lower_vri_intrin_vri(struct nir_builder *b,
bool is_ubo = (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
/* always load inline uniform blocks from ubo0 */
if (binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
return nir_imm_ivec2(b, 0, 0);
for (unsigned s = 0; s < desc_set_idx; s++) {
if (is_ubo)
value += layout->set[s].layout->stage[b->shader->info.stage].const_buffer_count;
@@ -209,6 +255,7 @@ void lvp_lower_pipeline_layout(const struct lvp_device *device,
struct lvp_pipeline_layout *layout,
nir_shader *shader)
{
nir_shader_lower_instructions(shader, lower_uniform_block_access, lower_block_instr, layout);
nir_shader_lower_instructions(shader, lower_vulkan_resource_index, lower_vri_instr, layout);
nir_foreach_variable_with_modes(var, shader, nir_var_uniform |
nir_var_image) {

View File

@@ -77,6 +77,8 @@ extern "C" {
#define MAX_SETS 8
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DESCRIPTOR_UNIFORM_BLOCK_SIZE 4096
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS 8
#ifdef _WIN32
#define lvp_printflike(a, b)
@@ -336,6 +338,8 @@ struct lvp_descriptor_set_binding_layout {
int16_t sampler_index;
int16_t sampler_view_index;
int16_t image_index;
int16_t uniform_block_index;
int16_t uniform_block_offset;
} stage[MESA_SHADER_STAGES];
/* Immutable samplers (or NULL if no immutable samplers) */
@@ -365,6 +369,9 @@ struct lvp_descriptor_set_layout {
uint16_t sampler_count;
uint16_t sampler_view_count;
uint16_t image_count;
uint16_t uniform_block_count;
uint16_t uniform_block_size;
uint16_t uniform_block_sizes[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS]; //zero-indexed
} stage[MESA_SHADER_STAGES];
/* Number of dynamic offsets used by this descriptor set */
@@ -405,6 +412,7 @@ union lvp_descriptor_info {
VkDeviceSize range;
};
struct lvp_buffer_view *buffer_view;
uint8_t *uniform;
};
struct lvp_descriptor {
@@ -461,6 +469,9 @@ struct lvp_pipeline_layout {
uint32_t push_constant_size;
VkShaderStageFlags push_constant_stages;
struct {
uint16_t uniform_block_size;
uint16_t uniform_block_count;
uint16_t uniform_block_sizes[MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS * MAX_SETS];
} stage[MESA_SHADER_STAGES];
};