anv: Also cache the struct anv_pipeline_binding maps
This is state the we generate when compiling the shaders and we need it for mapping resources from descriptor sets to binding table indices.
This commit is contained in:
@@ -37,7 +37,8 @@ void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
|
|||||||
struct brw_stage_prog_data *prog_data);
|
struct brw_stage_prog_data *prog_data);
|
||||||
void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||||
nir_shader *shader,
|
nir_shader *shader,
|
||||||
struct brw_stage_prog_data *prog_data);
|
struct brw_stage_prog_data *prog_data,
|
||||||
|
struct anv_pipeline_bind_map *map);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@@ -253,7 +253,8 @@ setup_vec4_uniform_value(const union gl_constant_value **params,
|
|||||||
void
|
void
|
||||||
anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||||
nir_shader *shader,
|
nir_shader *shader,
|
||||||
struct brw_stage_prog_data *prog_data)
|
struct brw_stage_prog_data *prog_data,
|
||||||
|
struct anv_pipeline_bind_map *map)
|
||||||
{
|
{
|
||||||
struct anv_pipeline_layout *layout = pipeline->layout;
|
struct anv_pipeline_layout *layout = pipeline->layout;
|
||||||
|
|
||||||
@@ -277,12 +278,6 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
|||||||
nir_foreach_block(function->impl, get_used_bindings_block, &state);
|
nir_foreach_block(function->impl, get_used_bindings_block, &state);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct anv_pipeline_bind_map map = {
|
|
||||||
.surface_count = 0,
|
|
||||||
.sampler_count = 0,
|
|
||||||
.image_count = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (uint32_t set = 0; set < layout->num_sets; set++) {
|
for (uint32_t set = 0; set < layout->num_sets; set++) {
|
||||||
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
|
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
|
||||||
|
|
||||||
@@ -290,21 +285,14 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
|||||||
BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
|
BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
|
||||||
set_layout->binding_count) {
|
set_layout->binding_count) {
|
||||||
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
|
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
|
||||||
map.surface_count += set_layout->binding[b].array_size;
|
map->surface_count += set_layout->binding[b].array_size;
|
||||||
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
|
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
|
||||||
map.sampler_count += set_layout->binding[b].array_size;
|
map->sampler_count += set_layout->binding[b].array_size;
|
||||||
if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
|
if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
|
||||||
map.image_count += set_layout->binding[b].array_size;
|
map->image_count += set_layout->binding[b].array_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
map.surface_to_descriptor =
|
|
||||||
malloc(map.surface_count * sizeof(struct anv_pipeline_binding));
|
|
||||||
map.sampler_to_descriptor =
|
|
||||||
malloc(map.sampler_count * sizeof(struct anv_pipeline_binding));
|
|
||||||
|
|
||||||
pipeline->bindings[shader->stage] = map;
|
|
||||||
|
|
||||||
unsigned surface = 0;
|
unsigned surface = 0;
|
||||||
unsigned sampler = 0;
|
unsigned sampler = 0;
|
||||||
unsigned image = 0;
|
unsigned image = 0;
|
||||||
@@ -320,8 +308,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
|||||||
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
|
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
|
||||||
state.set[set].surface_offsets[b] = surface;
|
state.set[set].surface_offsets[b] = surface;
|
||||||
for (unsigned i = 0; i < array_size; i++) {
|
for (unsigned i = 0; i < array_size; i++) {
|
||||||
map.surface_to_descriptor[surface + i].set = set;
|
map->surface_to_descriptor[surface + i].set = set;
|
||||||
map.surface_to_descriptor[surface + i].offset = set_offset + i;
|
map->surface_to_descriptor[surface + i].offset = set_offset + i;
|
||||||
}
|
}
|
||||||
surface += array_size;
|
surface += array_size;
|
||||||
}
|
}
|
||||||
@@ -329,8 +317,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
|||||||
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
|
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
|
||||||
state.set[set].sampler_offsets[b] = sampler;
|
state.set[set].sampler_offsets[b] = sampler;
|
||||||
for (unsigned i = 0; i < array_size; i++) {
|
for (unsigned i = 0; i < array_size; i++) {
|
||||||
map.sampler_to_descriptor[sampler + i].set = set;
|
map->sampler_to_descriptor[sampler + i].set = set;
|
||||||
map.sampler_to_descriptor[sampler + i].offset = set_offset + i;
|
map->sampler_to_descriptor[sampler + i].offset = set_offset + i;
|
||||||
}
|
}
|
||||||
sampler += array_size;
|
sampler += array_size;
|
||||||
}
|
}
|
||||||
@@ -351,8 +339,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (map.image_count > 0) {
|
if (map->image_count > 0) {
|
||||||
assert(map.image_count <= MAX_IMAGES);
|
assert(map->image_count <= MAX_IMAGES);
|
||||||
nir_foreach_variable(var, &shader->uniforms) {
|
nir_foreach_variable(var, &shader->uniforms) {
|
||||||
if (glsl_type_is_image(var->type) ||
|
if (glsl_type_is_image(var->type) ||
|
||||||
(glsl_type_is_array(var->type) &&
|
(glsl_type_is_array(var->type) &&
|
||||||
@@ -374,7 +362,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
|||||||
const gl_constant_value **param =
|
const gl_constant_value **param =
|
||||||
prog_data->param + (shader->num_uniforms / 4);
|
prog_data->param + (shader->num_uniforms / 4);
|
||||||
const struct brw_image_param *image_param = null_data->images;
|
const struct brw_image_param *image_param = null_data->images;
|
||||||
for (uint32_t i = 0; i < map.image_count; i++) {
|
for (uint32_t i = 0; i < map->image_count; i++) {
|
||||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
|
||||||
(const union gl_constant_value *)&image_param->surface_idx, 1);
|
(const union gl_constant_value *)&image_param->surface_idx, 1);
|
||||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
||||||
@@ -392,7 +380,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
|||||||
image_param ++;
|
image_param ++;
|
||||||
}
|
}
|
||||||
|
|
||||||
shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4;
|
shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
|
@@ -193,11 +193,6 @@ void anv_DestroyPipeline(
|
|||||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||||
|
|
||||||
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
|
|
||||||
free(pipeline->bindings[s].surface_to_descriptor);
|
|
||||||
free(pipeline->bindings[s].sampler_to_descriptor);
|
|
||||||
}
|
|
||||||
|
|
||||||
anv_reloc_list_finish(&pipeline->batch_relocs,
|
anv_reloc_list_finish(&pipeline->batch_relocs,
|
||||||
pAllocator ? pAllocator : &device->alloc);
|
pAllocator ? pAllocator : &device->alloc);
|
||||||
if (pipeline->blend_state.map)
|
if (pipeline->blend_state.map)
|
||||||
@@ -315,7 +310,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
|||||||
const char *entrypoint,
|
const char *entrypoint,
|
||||||
gl_shader_stage stage,
|
gl_shader_stage stage,
|
||||||
const VkSpecializationInfo *spec_info,
|
const VkSpecializationInfo *spec_info,
|
||||||
struct brw_stage_prog_data *prog_data)
|
struct brw_stage_prog_data *prog_data,
|
||||||
|
struct anv_pipeline_bind_map *map)
|
||||||
{
|
{
|
||||||
const struct brw_compiler *compiler =
|
const struct brw_compiler *compiler =
|
||||||
pipeline->device->instance->physicalDevice.compiler;
|
pipeline->device->instance->physicalDevice.compiler;
|
||||||
@@ -368,7 +364,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
|||||||
|
|
||||||
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
|
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
|
||||||
if (pipeline->layout)
|
if (pipeline->layout)
|
||||||
anv_nir_apply_pipeline_layout(pipeline, nir, prog_data);
|
anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map);
|
||||||
|
|
||||||
/* All binding table offsets provided by apply_pipeline_layout() are
|
/* All binding table offsets provided by apply_pipeline_layout() are
|
||||||
* relative to the start of the bindint table (plus MAX_RTS for VS).
|
* relative to the start of the bindint table (plus MAX_RTS for VS).
|
||||||
@@ -406,7 +402,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
|||||||
static void
|
static void
|
||||||
anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
|
anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
|
||||||
gl_shader_stage stage,
|
gl_shader_stage stage,
|
||||||
const struct brw_stage_prog_data *prog_data)
|
const struct brw_stage_prog_data *prog_data,
|
||||||
|
struct anv_pipeline_bind_map *map)
|
||||||
{
|
{
|
||||||
struct brw_device_info *devinfo = &pipeline->device->info;
|
struct brw_device_info *devinfo = &pipeline->device->info;
|
||||||
uint32_t max_threads[] = {
|
uint32_t max_threads[] = {
|
||||||
@@ -424,6 +421,7 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
|
|||||||
pipeline->total_scratch =
|
pipeline->total_scratch =
|
||||||
align_u32(pipeline->total_scratch, 1024) +
|
align_u32(pipeline->total_scratch, 1024) +
|
||||||
prog_data->total_scratch * max_threads[stage];
|
prog_data->total_scratch * max_threads[stage];
|
||||||
|
pipeline->bindings[stage] = *map;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
@@ -437,6 +435,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
|||||||
const struct brw_compiler *compiler =
|
const struct brw_compiler *compiler =
|
||||||
pipeline->device->instance->physicalDevice.compiler;
|
pipeline->device->instance->physicalDevice.compiler;
|
||||||
const struct brw_stage_prog_data *stage_prog_data;
|
const struct brw_stage_prog_data *stage_prog_data;
|
||||||
|
struct anv_pipeline_bind_map map;
|
||||||
struct brw_vs_prog_key key;
|
struct brw_vs_prog_key key;
|
||||||
uint32_t kernel = NO_KERNEL;
|
uint32_t kernel = NO_KERNEL;
|
||||||
unsigned char sha1[20];
|
unsigned char sha1[20];
|
||||||
@@ -445,15 +444,22 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
|||||||
|
|
||||||
if (module->size > 0) {
|
if (module->size > 0) {
|
||||||
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
||||||
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
|
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernel == NO_KERNEL) {
|
if (kernel == NO_KERNEL) {
|
||||||
struct brw_vs_prog_data prog_data = { 0, };
|
struct brw_vs_prog_data prog_data = { 0, };
|
||||||
|
struct anv_pipeline_binding surface_to_descriptor[256];
|
||||||
|
struct anv_pipeline_binding sampler_to_descriptor[256];
|
||||||
|
|
||||||
|
map = (struct anv_pipeline_bind_map) {
|
||||||
|
.surface_to_descriptor = surface_to_descriptor,
|
||||||
|
.sampler_to_descriptor = sampler_to_descriptor
|
||||||
|
};
|
||||||
|
|
||||||
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
||||||
MESA_SHADER_VERTEX, spec_info,
|
MESA_SHADER_VERTEX, spec_info,
|
||||||
&prog_data.base.base);
|
&prog_data.base.base, &map);
|
||||||
if (nir == NULL)
|
if (nir == NULL)
|
||||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
@@ -484,8 +490,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
|||||||
kernel = anv_pipeline_cache_upload_kernel(cache,
|
kernel = anv_pipeline_cache_upload_kernel(cache,
|
||||||
module->size > 0 ? sha1 : NULL,
|
module->size > 0 ? sha1 : NULL,
|
||||||
shader_code, code_size,
|
shader_code, code_size,
|
||||||
&stage_prog_data,
|
&stage_prog_data, sizeof(prog_data),
|
||||||
sizeof(prog_data));
|
&map);
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -501,7 +507,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
|||||||
}
|
}
|
||||||
|
|
||||||
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
|
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
|
||||||
stage_prog_data);
|
stage_prog_data, &map);
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -517,6 +523,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
|
|||||||
const struct brw_compiler *compiler =
|
const struct brw_compiler *compiler =
|
||||||
pipeline->device->instance->physicalDevice.compiler;
|
pipeline->device->instance->physicalDevice.compiler;
|
||||||
const struct brw_stage_prog_data *stage_prog_data;
|
const struct brw_stage_prog_data *stage_prog_data;
|
||||||
|
struct anv_pipeline_bind_map map;
|
||||||
struct brw_gs_prog_key key;
|
struct brw_gs_prog_key key;
|
||||||
uint32_t kernel = NO_KERNEL;
|
uint32_t kernel = NO_KERNEL;
|
||||||
unsigned char sha1[20];
|
unsigned char sha1[20];
|
||||||
@@ -525,15 +532,22 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
|
|||||||
|
|
||||||
if (module->size > 0) {
|
if (module->size > 0) {
|
||||||
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
||||||
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
|
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernel == NO_KERNEL) {
|
if (kernel == NO_KERNEL) {
|
||||||
struct brw_gs_prog_data prog_data = { 0, };
|
struct brw_gs_prog_data prog_data = { 0, };
|
||||||
|
struct anv_pipeline_binding surface_to_descriptor[256];
|
||||||
|
struct anv_pipeline_binding sampler_to_descriptor[256];
|
||||||
|
|
||||||
|
map = (struct anv_pipeline_bind_map) {
|
||||||
|
.surface_to_descriptor = surface_to_descriptor,
|
||||||
|
.sampler_to_descriptor = sampler_to_descriptor
|
||||||
|
};
|
||||||
|
|
||||||
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
||||||
MESA_SHADER_GEOMETRY, spec_info,
|
MESA_SHADER_GEOMETRY, spec_info,
|
||||||
&prog_data.base.base);
|
&prog_data.base.base, &map);
|
||||||
if (nir == NULL)
|
if (nir == NULL)
|
||||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
@@ -564,7 +578,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
|
|||||||
kernel = anv_pipeline_cache_upload_kernel(cache,
|
kernel = anv_pipeline_cache_upload_kernel(cache,
|
||||||
module->size > 0 ? sha1 : NULL,
|
module->size > 0 ? sha1 : NULL,
|
||||||
shader_code, code_size,
|
shader_code, code_size,
|
||||||
&stage_prog_data, sizeof(prog_data));
|
&stage_prog_data, sizeof(prog_data),
|
||||||
|
&map);
|
||||||
|
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
}
|
}
|
||||||
@@ -572,7 +587,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
|
|||||||
pipeline->gs_kernel = kernel;
|
pipeline->gs_kernel = kernel;
|
||||||
|
|
||||||
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
|
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
|
||||||
stage_prog_data);
|
stage_prog_data, &map);
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -589,6 +604,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
|||||||
const struct brw_compiler *compiler =
|
const struct brw_compiler *compiler =
|
||||||
pipeline->device->instance->physicalDevice.compiler;
|
pipeline->device->instance->physicalDevice.compiler;
|
||||||
const struct brw_stage_prog_data *stage_prog_data;
|
const struct brw_stage_prog_data *stage_prog_data;
|
||||||
|
struct anv_pipeline_bind_map map;
|
||||||
struct brw_wm_prog_key key;
|
struct brw_wm_prog_key key;
|
||||||
uint32_t kernel = NO_KERNEL;
|
uint32_t kernel = NO_KERNEL;
|
||||||
unsigned char sha1[20];
|
unsigned char sha1[20];
|
||||||
@@ -600,17 +616,22 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
|||||||
|
|
||||||
if (module->size > 0) {
|
if (module->size > 0) {
|
||||||
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
||||||
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
|
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernel == NO_KERNEL) {
|
if (kernel == NO_KERNEL) {
|
||||||
struct brw_wm_prog_data prog_data = { 0, };
|
struct brw_wm_prog_data prog_data = { 0, };
|
||||||
|
struct anv_pipeline_binding surface_to_descriptor[256];
|
||||||
|
struct anv_pipeline_binding sampler_to_descriptor[256];
|
||||||
|
|
||||||
prog_data.binding_table.render_target_start = 0;
|
map = (struct anv_pipeline_bind_map) {
|
||||||
|
.surface_to_descriptor = surface_to_descriptor,
|
||||||
|
.sampler_to_descriptor = sampler_to_descriptor
|
||||||
|
};
|
||||||
|
|
||||||
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
||||||
MESA_SHADER_FRAGMENT, spec_info,
|
MESA_SHADER_FRAGMENT, spec_info,
|
||||||
&prog_data.base);
|
&prog_data.base, &map);
|
||||||
if (nir == NULL)
|
if (nir == NULL)
|
||||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
@@ -645,7 +666,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
|||||||
kernel = anv_pipeline_cache_upload_kernel(cache,
|
kernel = anv_pipeline_cache_upload_kernel(cache,
|
||||||
module->size > 0 ? sha1 : NULL,
|
module->size > 0 ? sha1 : NULL,
|
||||||
shader_code, code_size,
|
shader_code, code_size,
|
||||||
&stage_prog_data, sizeof(prog_data));
|
&stage_prog_data, sizeof(prog_data),
|
||||||
|
&map);
|
||||||
|
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
}
|
}
|
||||||
@@ -679,7 +701,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
|||||||
}
|
}
|
||||||
|
|
||||||
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
|
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
|
||||||
stage_prog_data);
|
stage_prog_data, &map);
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -695,6 +717,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
|
|||||||
const struct brw_compiler *compiler =
|
const struct brw_compiler *compiler =
|
||||||
pipeline->device->instance->physicalDevice.compiler;
|
pipeline->device->instance->physicalDevice.compiler;
|
||||||
const struct brw_stage_prog_data *stage_prog_data;
|
const struct brw_stage_prog_data *stage_prog_data;
|
||||||
|
struct anv_pipeline_bind_map map;
|
||||||
struct brw_cs_prog_key key;
|
struct brw_cs_prog_key key;
|
||||||
uint32_t kernel = NO_KERNEL;
|
uint32_t kernel = NO_KERNEL;
|
||||||
unsigned char sha1[20];
|
unsigned char sha1[20];
|
||||||
@@ -703,17 +726,22 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
|
|||||||
|
|
||||||
if (module->size > 0) {
|
if (module->size > 0) {
|
||||||
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
|
||||||
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
|
kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (module->size == 0 || kernel == NO_KERNEL) {
|
if (module->size == 0 || kernel == NO_KERNEL) {
|
||||||
struct brw_cs_prog_data prog_data = { 0, };
|
struct brw_cs_prog_data prog_data = { 0, };
|
||||||
|
struct anv_pipeline_binding surface_to_descriptor[256];
|
||||||
|
struct anv_pipeline_binding sampler_to_descriptor[256];
|
||||||
|
|
||||||
prog_data.binding_table.work_groups_start = 0;
|
map = (struct anv_pipeline_bind_map) {
|
||||||
|
.surface_to_descriptor = surface_to_descriptor,
|
||||||
|
.sampler_to_descriptor = sampler_to_descriptor
|
||||||
|
};
|
||||||
|
|
||||||
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
|
||||||
MESA_SHADER_COMPUTE, spec_info,
|
MESA_SHADER_COMPUTE, spec_info,
|
||||||
&prog_data.base);
|
&prog_data.base, &map);
|
||||||
if (nir == NULL)
|
if (nir == NULL)
|
||||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
@@ -737,14 +765,16 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
|
|||||||
kernel = anv_pipeline_cache_upload_kernel(cache,
|
kernel = anv_pipeline_cache_upload_kernel(cache,
|
||||||
module->size > 0 ? sha1 : NULL,
|
module->size > 0 ? sha1 : NULL,
|
||||||
shader_code, code_size,
|
shader_code, code_size,
|
||||||
&stage_prog_data, sizeof(prog_data));
|
&stage_prog_data, sizeof(prog_data),
|
||||||
|
&map);
|
||||||
|
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline->cs_simd = kernel;
|
pipeline->cs_simd = kernel;
|
||||||
|
|
||||||
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
|
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
|
||||||
stage_prog_data);
|
stage_prog_data, &map);
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@@ -72,6 +72,10 @@ struct cache_entry {
|
|||||||
unsigned char sha1[20];
|
unsigned char sha1[20];
|
||||||
uint32_t prog_data_size;
|
uint32_t prog_data_size;
|
||||||
uint32_t kernel_size;
|
uint32_t kernel_size;
|
||||||
|
uint32_t surface_count;
|
||||||
|
uint32_t sampler_count;
|
||||||
|
uint32_t image_count;
|
||||||
|
|
||||||
char prog_data[0];
|
char prog_data[0];
|
||||||
|
|
||||||
/* kernel follows prog_data at next 64 byte aligned address */
|
/* kernel follows prog_data at next 64 byte aligned address */
|
||||||
@@ -84,7 +88,11 @@ entry_size(struct cache_entry *entry)
|
|||||||
* doesn't include the alignment padding bytes.
|
* doesn't include the alignment padding bytes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return sizeof(*entry) + entry->prog_data_size + entry->kernel_size;
|
const uint32_t map_size =
|
||||||
|
entry->surface_count * sizeof(struct anv_pipeline_binding) +
|
||||||
|
entry->sampler_count * sizeof(struct anv_pipeline_binding);
|
||||||
|
|
||||||
|
return sizeof(*entry) + entry->prog_data_size + map_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -111,7 +119,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
|
|||||||
uint32_t
|
uint32_t
|
||||||
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
|
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
|
||||||
const unsigned char *sha1,
|
const unsigned char *sha1,
|
||||||
const struct brw_stage_prog_data **prog_data)
|
const struct brw_stage_prog_data **prog_data,
|
||||||
|
struct anv_pipeline_bind_map *map)
|
||||||
{
|
{
|
||||||
const uint32_t mask = cache->table_size - 1;
|
const uint32_t mask = cache->table_size - 1;
|
||||||
const uint32_t start = (*(uint32_t *) sha1);
|
const uint32_t start = (*(uint32_t *) sha1);
|
||||||
@@ -126,13 +135,20 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
|
|||||||
struct cache_entry *entry =
|
struct cache_entry *entry =
|
||||||
cache->program_stream.block_pool->map + offset;
|
cache->program_stream.block_pool->map + offset;
|
||||||
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
|
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
|
||||||
if (prog_data)
|
if (prog_data) {
|
||||||
*prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
|
assert(map);
|
||||||
|
void *p = entry->prog_data;
|
||||||
|
*prog_data = p;
|
||||||
|
p += entry->prog_data_size;
|
||||||
|
map->surface_count = entry->surface_count;
|
||||||
|
map->sampler_count = entry->sampler_count;
|
||||||
|
map->image_count = entry->image_count;
|
||||||
|
map->surface_to_descriptor = p;
|
||||||
|
p += map->surface_count * sizeof(struct anv_pipeline_binding);
|
||||||
|
map->sampler_to_descriptor = p;
|
||||||
|
}
|
||||||
|
|
||||||
const uint32_t preamble_size =
|
return offset + align_u32(entry_size(entry), 64);
|
||||||
align_u32(sizeof(*entry) + entry->prog_data_size, 64);
|
|
||||||
|
|
||||||
return offset + preamble_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,7 +173,7 @@ anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cache->total_size += entry_size(entry);
|
cache->total_size += entry_size(entry) + entry->kernel_size;
|
||||||
cache->kernel_count++;
|
cache->kernel_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -214,13 +230,18 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
|||||||
const unsigned char *sha1,
|
const unsigned char *sha1,
|
||||||
const void *kernel, size_t kernel_size,
|
const void *kernel, size_t kernel_size,
|
||||||
const struct brw_stage_prog_data **prog_data,
|
const struct brw_stage_prog_data **prog_data,
|
||||||
size_t prog_data_size)
|
size_t prog_data_size,
|
||||||
|
struct anv_pipeline_bind_map *map)
|
||||||
{
|
{
|
||||||
pthread_mutex_lock(&cache->mutex);
|
pthread_mutex_lock(&cache->mutex);
|
||||||
struct cache_entry *entry;
|
struct cache_entry *entry;
|
||||||
|
|
||||||
|
const uint32_t map_size =
|
||||||
|
map->surface_count * sizeof(struct anv_pipeline_binding) +
|
||||||
|
map->sampler_count * sizeof(struct anv_pipeline_binding);
|
||||||
|
|
||||||
const uint32_t preamble_size =
|
const uint32_t preamble_size =
|
||||||
align_u32(sizeof(*entry) + prog_data_size, 64);
|
align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
|
||||||
|
|
||||||
const uint32_t size = preamble_size + kernel_size;
|
const uint32_t size = preamble_size + kernel_size;
|
||||||
|
|
||||||
@@ -230,12 +251,26 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
|||||||
|
|
||||||
entry = state.map;
|
entry = state.map;
|
||||||
entry->prog_data_size = prog_data_size;
|
entry->prog_data_size = prog_data_size;
|
||||||
memcpy(entry->prog_data, *prog_data, prog_data_size);
|
entry->surface_count = map->surface_count;
|
||||||
*prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
|
entry->sampler_count = map->sampler_count;
|
||||||
|
entry->image_count = map->image_count;
|
||||||
entry->kernel_size = kernel_size;
|
entry->kernel_size = kernel_size;
|
||||||
|
|
||||||
|
void *p = entry->prog_data;
|
||||||
|
memcpy(p, *prog_data, prog_data_size);
|
||||||
|
p += prog_data_size;
|
||||||
|
|
||||||
|
memcpy(p, map->surface_to_descriptor,
|
||||||
|
map->surface_count * sizeof(struct anv_pipeline_binding));
|
||||||
|
map->surface_to_descriptor = p;
|
||||||
|
p += map->surface_count * sizeof(struct anv_pipeline_binding);
|
||||||
|
|
||||||
|
memcpy(p, map->sampler_to_descriptor,
|
||||||
|
map->sampler_count * sizeof(struct anv_pipeline_binding));
|
||||||
|
map->sampler_to_descriptor = p;
|
||||||
|
|
||||||
if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
|
if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
|
||||||
assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL);
|
assert(anv_pipeline_cache_search(cache, sha1, NULL, NULL) == NO_KERNEL);
|
||||||
|
|
||||||
memcpy(entry->sha1, sha1, sizeof(entry->sha1));
|
memcpy(entry->sha1, sha1, sizeof(entry->sha1));
|
||||||
anv_pipeline_cache_add_entry(cache, entry, state.offset);
|
anv_pipeline_cache_add_entry(cache, entry, state.offset);
|
||||||
@@ -248,6 +283,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
|||||||
if (!cache->device->info.has_llc)
|
if (!cache->device->info.has_llc)
|
||||||
anv_state_clflush(state);
|
anv_state_clflush(state);
|
||||||
|
|
||||||
|
*prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
|
||||||
|
|
||||||
return state.offset + preamble_size;
|
return state.offset + preamble_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -282,23 +319,34 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
|
|||||||
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
|
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const void *end = data + size;
|
void *end = (void *) data + size;
|
||||||
const void *p = data + header.header_size;
|
void *p = (void *) data + header.header_size;
|
||||||
|
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
/* The kernels aren't 64 byte aligned in the serialized format so
|
struct cache_entry *entry = p;
|
||||||
* they're always right after the prog_data.
|
|
||||||
*/
|
|
||||||
const struct cache_entry *entry = p;
|
|
||||||
const void *kernel = &entry->prog_data[entry->prog_data_size];
|
|
||||||
|
|
||||||
const struct brw_stage_prog_data *prog_data =
|
void *data = entry->prog_data;
|
||||||
(const struct brw_stage_prog_data *) entry->prog_data;
|
const struct brw_stage_prog_data *prog_data = data;
|
||||||
|
data += entry->prog_data_size;
|
||||||
|
|
||||||
|
struct anv_pipeline_binding *surface_to_descriptor = data;
|
||||||
|
data += entry->surface_count * sizeof(struct anv_pipeline_binding);
|
||||||
|
struct anv_pipeline_binding *sampler_to_descriptor = data;
|
||||||
|
data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
|
||||||
|
void *kernel = data;
|
||||||
|
|
||||||
|
struct anv_pipeline_bind_map map = {
|
||||||
|
.surface_count = entry->surface_count,
|
||||||
|
.sampler_count = entry->sampler_count,
|
||||||
|
.image_count = entry->image_count,
|
||||||
|
.surface_to_descriptor = surface_to_descriptor,
|
||||||
|
.sampler_to_descriptor = sampler_to_descriptor
|
||||||
|
};
|
||||||
|
|
||||||
anv_pipeline_cache_upload_kernel(cache, entry->sha1,
|
anv_pipeline_cache_upload_kernel(cache, entry->sha1,
|
||||||
kernel, entry->kernel_size,
|
kernel, entry->kernel_size,
|
||||||
&prog_data,
|
&prog_data,
|
||||||
entry->prog_data_size);
|
entry->prog_data_size, &map);
|
||||||
p = kernel + entry->kernel_size;
|
p = kernel + entry->kernel_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -383,14 +431,14 @@ VkResult anv_GetPipelineCacheData(
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
entry = cache->program_stream.block_pool->map + cache->hash_table[i];
|
entry = cache->program_stream.block_pool->map + cache->hash_table[i];
|
||||||
if (end < p + entry_size(entry))
|
const uint32_t size = entry_size(entry);
|
||||||
|
if (end < p + size + entry->kernel_size)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
memcpy(p, entry, sizeof(*entry) + entry->prog_data_size);
|
memcpy(p, entry, size);
|
||||||
p += sizeof(*entry) + entry->prog_data_size;
|
p += size;
|
||||||
|
|
||||||
void *kernel = (void *) entry +
|
void *kernel = (void *) entry + align_u32(size, 64);
|
||||||
align_u32(sizeof(*entry) + entry->prog_data_size, 64);
|
|
||||||
|
|
||||||
memcpy(p, kernel, entry->kernel_size);
|
memcpy(p, kernel, entry->kernel_size);
|
||||||
p += entry->kernel_size;
|
p += entry->kernel_size;
|
||||||
@@ -413,7 +461,7 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
|
|||||||
struct cache_entry *entry =
|
struct cache_entry *entry =
|
||||||
src->program_stream.block_pool->map + offset;
|
src->program_stream.block_pool->map + offset;
|
||||||
|
|
||||||
if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL)
|
if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
anv_pipeline_cache_add_entry(dst, entry, offset);
|
anv_pipeline_cache_add_entry(dst, entry, offset);
|
||||||
|
@@ -636,18 +636,22 @@ struct anv_pipeline_cache {
|
|||||||
uint32_t * hash_table;
|
uint32_t * hash_table;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct anv_pipeline_bind_map;
|
||||||
|
|
||||||
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
|
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
|
||||||
struct anv_device *device);
|
struct anv_device *device);
|
||||||
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
|
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
|
||||||
uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
|
uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
|
||||||
const unsigned char *sha1,
|
const unsigned char *sha1,
|
||||||
const struct brw_stage_prog_data **prog_data);
|
const struct brw_stage_prog_data **prog_data,
|
||||||
|
struct anv_pipeline_bind_map *map);
|
||||||
uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
||||||
const unsigned char *sha1,
|
const unsigned char *sha1,
|
||||||
const void *kernel,
|
const void *kernel,
|
||||||
size_t kernel_size,
|
size_t kernel_size,
|
||||||
const struct brw_stage_prog_data **prog_data,
|
const struct brw_stage_prog_data **prog_data,
|
||||||
size_t prog_data_size);
|
size_t prog_data_size,
|
||||||
|
struct anv_pipeline_bind_map *map);
|
||||||
|
|
||||||
struct anv_device {
|
struct anv_device {
|
||||||
VK_LOADER_DATA _loader_data;
|
VK_LOADER_DATA _loader_data;
|
||||||
|
Reference in New Issue
Block a user