anv: Split dispatch tables into device and instance

There's no reason why we need generate trampoline functions for instance
functions or carry N copies of the instance dispatch table around for
every hardware generation.  Splitting the tables and being more
conservative shaves about 34K off .text and about 4K off .data when
built with clang.

Before splitting dispatch tables:

   text	   data	    bss	    dec	    hex	filename
3224305	 286216	   8960	3519481	 35b3f9	_install/lib64/libvulkan_intel.so

After splitting dispatch tables:

   text	   data	    bss	    dec	    hex	filename
3190325	 282232	   8960	3481517	 351fad	_install/lib64/libvulkan_intel.so

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
Jason Ekstrand
2018-10-13 12:16:14 -05:00
parent 18cc65edf8
commit ae18c53ba6
3 changed files with 232 additions and 93 deletions

View File

@@ -637,14 +637,28 @@ VkResult anv_CreateInstance(
/* Vulkan requires that entrypoints for extensions which have not been
* enabled must not be advertised.
*/
if (!anv_entrypoint_is_enabled(i, instance->app_info.api_version,
&instance->enabled_extensions, NULL)) {
if (!anv_instance_entrypoint_is_enabled(i, instance->app_info.api_version,
&instance->enabled_extensions)) {
instance->dispatch.entrypoints[i] = NULL;
} else if (anv_dispatch_table.entrypoints[i] != NULL) {
instance->dispatch.entrypoints[i] = anv_dispatch_table.entrypoints[i];
} else {
instance->dispatch.entrypoints[i] =
anv_tramp_dispatch_table.entrypoints[i];
anv_instance_dispatch_table.entrypoints[i];
}
}
for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); i++) {
/* Vulkan requires that entrypoints for extensions which have not been
* enabled must not be advertised.
*/
if (!anv_device_entrypoint_is_enabled(i, instance->app_info.api_version,
&instance->enabled_extensions, NULL)) {
instance->device_dispatch.entrypoints[i] = NULL;
} else if (anv_device_dispatch_table.entrypoints[i] != NULL) {
instance->device_dispatch.entrypoints[i] =
anv_device_dispatch_table.entrypoints[i];
} else {
instance->device_dispatch.entrypoints[i] =
anv_tramp_device_dispatch_table.entrypoints[i];
}
}
@@ -1349,11 +1363,15 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr(
if (instance == NULL)
return NULL;
int idx = anv_get_entrypoint_index(pName);
if (idx < 0)
return NULL;
int idx = anv_get_instance_entrypoint_index(pName);
if (idx >= 0)
return instance->dispatch.entrypoints[idx];
idx = anv_get_device_entrypoint_index(pName);
if (idx >= 0)
return instance->device_dispatch.entrypoints[idx];
return NULL;
}
/* With version 1+ of the loader interface the ICD should expose
@@ -1381,7 +1399,7 @@ PFN_vkVoidFunction anv_GetDeviceProcAddr(
if (!device || !pName)
return NULL;
int idx = anv_get_entrypoint_index(pName);
int idx = anv_get_device_entrypoint_index(pName);
if (idx < 0)
return NULL;
@@ -1531,25 +1549,25 @@ VkResult anv_EnumerateDeviceExtensionProperties(
static void
anv_device_init_dispatch(struct anv_device *device)
{
const struct anv_dispatch_table *genX_table;
const struct anv_device_dispatch_table *genX_table;
switch (device->info.gen) {
case 11:
genX_table = &gen11_dispatch_table;
genX_table = &gen11_device_dispatch_table;
break;
case 10:
genX_table = &gen10_dispatch_table;
genX_table = &gen10_device_dispatch_table;
break;
case 9:
genX_table = &gen9_dispatch_table;
genX_table = &gen9_device_dispatch_table;
break;
case 8:
genX_table = &gen8_dispatch_table;
genX_table = &gen8_device_dispatch_table;
break;
case 7:
if (device->info.is_haswell)
genX_table = &gen75_dispatch_table;
genX_table = &gen75_device_dispatch_table;
else
genX_table = &gen7_dispatch_table;
genX_table = &gen7_device_dispatch_table;
break;
default:
unreachable("unsupported gen\n");
@@ -1559,14 +1577,15 @@ anv_device_init_dispatch(struct anv_device *device)
/* Vulkan requires that entrypoints for extensions which have not been
* enabled must not be advertised.
*/
if (!anv_entrypoint_is_enabled(i, device->instance->app_info.api_version,
if (!anv_device_entrypoint_is_enabled(i, device->instance->app_info.api_version,
&device->instance->enabled_extensions,
&device->enabled_extensions)) {
device->dispatch.entrypoints[i] = NULL;
} else if (genX_table->entrypoints[i]) {
device->dispatch.entrypoints[i] = genX_table->entrypoints[i];
} else {
device->dispatch.entrypoints[i] = anv_dispatch_table.entrypoints[i];
device->dispatch.entrypoints[i] =
anv_device_dispatch_table.entrypoints[i];
}
}
}

View File

@@ -50,11 +50,11 @@ LAYERS = [
TEMPLATE_H = Template("""\
/* This file generated from ${filename}, don't edit directly. */
struct anv_dispatch_table {
struct anv_instance_dispatch_table {
union {
void *entrypoints[${len(entrypoints)}];
void *entrypoints[${len(instance_entrypoints)}];
struct {
% for e in entrypoints:
% for e in instance_entrypoints:
% if e.guard is not None:
#ifdef ${e.guard}
PFN_${e.name} ${e.name};
@@ -69,12 +69,45 @@ struct anv_dispatch_table {
};
};
%for layer in LAYERS:
extern const struct anv_dispatch_table ${layer}_dispatch_table;
struct anv_device_dispatch_table {
union {
void *entrypoints[${len(device_entrypoints)}];
struct {
% for e in device_entrypoints:
% if e.guard is not None:
#ifdef ${e.guard}
PFN_${e.name} ${e.name};
#else
void *${e.name};
# endif
% else:
PFN_${e.name} ${e.name};
% endif
% endfor
extern const struct anv_dispatch_table anv_tramp_dispatch_table;
};
};
};
% for e in entrypoints:
extern const struct anv_instance_dispatch_table anv_instance_dispatch_table;
%for layer in LAYERS:
extern const struct anv_device_dispatch_table ${layer}_device_dispatch_table;
%endfor
extern const struct anv_device_dispatch_table anv_tramp_device_dispatch_table;
% for e in instance_entrypoints:
% if e.alias:
<% continue %>
% endif
% if e.guard is not None:
#ifdef ${e.guard}
% endif
${e.return_type} ${e.prefixed_name('anv')}(${e.decl_params()});
% if e.guard is not None:
#endif // ${e.guard}
% endif
% endfor
% for e in device_entrypoints:
% if e.alias:
<% continue %>
% endif
@@ -129,13 +162,14 @@ struct string_map_entry {
* store the index into this big string.
*/
static const char strings[] =
<%def name="strmap(strmap, prefix)">
static const char ${prefix}_strings[] =
% for s in strmap.sorted_strings:
"${s.string}\\0"
% endfor
;
static const struct string_map_entry string_map_entries[] = {
static const struct string_map_entry ${prefix}_string_map_entries[] = {
% for s in strmap.sorted_strings:
{ ${s.offset}, ${'{:0=#8x}'.format(s.hash)}, ${s.num} }, /* ${s.string} */
% endfor
@@ -150,14 +184,14 @@ static const struct string_map_entry string_map_entries[] = {
*/
#define none 0xffff
static const uint16_t string_map[${strmap.hash_size}] = {
static const uint16_t ${prefix}_string_map[${strmap.hash_size}] = {
% for e in strmap.mapping:
${ '{:0=#6x}'.format(e) if e >= 0 else 'none' },
% endfor
};
static int
string_map_lookup(const char *str)
${prefix}_string_map_lookup(const char *str)
{
static const uint32_t prime_factor = ${strmap.prime_factor};
static const uint32_t prime_step = ${strmap.prime_step};
@@ -172,25 +206,54 @@ string_map_lookup(const char *str)
h = hash;
while (1) {
i = string_map[h & ${strmap.hash_mask}];
i = ${prefix}_string_map[h & ${strmap.hash_mask}];
if (i == none)
return -1;
e = &string_map_entries[i];
if (e->hash == hash && strcmp(str, strings + e->name) == 0)
e = &${prefix}_string_map_entries[i];
if (e->hash == hash && strcmp(str, ${prefix}_strings + e->name) == 0)
return e->num;
h += prime_step;
}
return -1;
}
</%def>
${strmap(instance_strmap, 'instance')}
${strmap(device_strmap, 'device')}
/* Weak aliases for all potential implementations. These will resolve to
* NULL if they're not defined, which lets the resolve_entrypoint() function
* either pick the correct entry point.
*/
% for e in instance_entrypoints:
% if e.alias:
<% continue %>
% endif
% if e.guard is not None:
#ifdef ${e.guard}
% endif
${e.return_type} ${e.prefixed_name('anv')}(${e.decl_params()}) __attribute__ ((weak));
% if e.guard is not None:
#endif // ${e.guard}
% endif
% endfor
const struct anv_instance_dispatch_table anv_instance_dispatch_table = {
% for e in instance_entrypoints:
% if e.guard is not None:
#ifdef ${e.guard}
% endif
.${e.name} = ${e.prefixed_name('anv')},
% if e.guard is not None:
#endif // ${e.guard}
% endif
% endfor
};
% for layer in LAYERS:
% for e in entrypoints:
% for e in device_entrypoints:
% if e.alias:
<% continue %>
% endif
@@ -203,8 +266,8 @@ string_map_lookup(const char *str)
% endif
% endfor
const struct anv_dispatch_table ${layer}_dispatch_table = {
% for e in entrypoints:
const struct anv_device_dispatch_table ${layer}_device_dispatch_table = {
% for e in device_entrypoints:
% if e.guard is not None:
#ifdef ${e.guard}
% endif
@@ -219,8 +282,8 @@ string_map_lookup(const char *str)
/** Trampoline entrypoints for all device functions */
% for e in entrypoints:
% if e.alias or not e.is_device_entrypoint():
% for e in device_entrypoints:
% if e.alias:
<% continue %>
% endif
% if e.guard is not None:
@@ -247,11 +310,8 @@ string_map_lookup(const char *str)
% endif
% endfor
const struct anv_dispatch_table anv_tramp_dispatch_table = {
% for e in entrypoints:
% if not e.is_device_entrypoint():
<% continue %>
% endif
const struct anv_device_dispatch_table anv_tramp_device_dispatch_table = {
% for e in device_entrypoints:
% if e.guard is not None:
#ifdef ${e.guard}
% endif
@@ -269,24 +329,54 @@ const struct anv_dispatch_table anv_tramp_dispatch_table = {
* If device is NULL, all device extensions are considered enabled.
*/
bool
anv_entrypoint_is_enabled(int index, uint32_t core_version,
anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance)
{
switch (index) {
% for e in instance_entrypoints:
case ${e.num}:
/* ${e.name} */
% if e.core_version:
return ${e.core_version.c_vk_version()} <= core_version;
% elif e.extensions:
% for ext in e.extensions:
% if ext.type == 'instance':
if (instance->${ext.name[3:]}) return true;
% else:
/* All device extensions are considered enabled at the instance level */
return true;
% endif
% endfor
return false;
% else:
return true;
% endif
% endfor
default:
return false;
}
}
/** Return true if the core version or extension in which the given entrypoint
* is defined is enabled.
*
* If device is NULL, all device extensions are considered enabled.
*/
bool
anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance,
const struct anv_device_extension_table *device)
{
switch (index) {
% for e in entrypoints:
% for e in device_entrypoints:
case ${e.num}:
/* ${e.name} */
% if e.core_version:
% if e.is_device_entrypoint():
return ${e.core_version.c_vk_version()} <= core_version;
% else:
return !device && ${e.core_version.c_vk_version()} <= core_version;
% endif
% elif e.extensions:
% for ext in e.extensions:
% if ext.type == 'instance':
if (!device && instance->${ext.name[3:]}) return true;
<% assert False %>
% else:
if (!device || device->${ext.name[3:]}) return true;
% endif
@@ -301,32 +391,40 @@ anv_entrypoint_is_enabled(int index, uint32_t core_version,
}
}
static void * __attribute__ ((noinline))
anv_resolve_entrypoint(const struct gen_device_info *devinfo, uint32_t index)
int
anv_get_instance_entrypoint_index(const char *name)
{
if (devinfo == NULL) {
return anv_dispatch_table.entrypoints[index];
return instance_string_map_lookup(name);
}
const struct anv_dispatch_table *genX_table;
int
anv_get_device_entrypoint_index(const char *name)
{
return device_string_map_lookup(name);
}
static void * __attribute__ ((noinline))
anv_resolve_device_entrypoint(const struct gen_device_info *devinfo, uint32_t index)
{
const struct anv_device_dispatch_table *genX_table;
switch (devinfo->gen) {
case 11:
genX_table = &gen11_dispatch_table;
genX_table = &gen11_device_dispatch_table;
break;
case 10:
genX_table = &gen10_dispatch_table;
genX_table = &gen10_device_dispatch_table;
break;
case 9:
genX_table = &gen9_dispatch_table;
genX_table = &gen9_device_dispatch_table;
break;
case 8:
genX_table = &gen8_dispatch_table;
genX_table = &gen8_device_dispatch_table;
break;
case 7:
if (devinfo->is_haswell)
genX_table = &gen75_dispatch_table;
genX_table = &gen75_device_dispatch_table;
else
genX_table = &gen7_dispatch_table;
genX_table = &gen7_device_dispatch_table;
break;
default:
unreachable("unsupported gen\\n");
@@ -335,22 +433,21 @@ anv_resolve_entrypoint(const struct gen_device_info *devinfo, uint32_t index)
if (genX_table->entrypoints[index])
return genX_table->entrypoints[index];
else
return anv_dispatch_table.entrypoints[index];
}
int
anv_get_entrypoint_index(const char *name)
{
return string_map_lookup(name);
return anv_device_dispatch_table.entrypoints[index];
}
void *
anv_lookup_entrypoint(const struct gen_device_info *devinfo, const char *name)
{
int idx = anv_get_entrypoint_index(name);
if (idx < 0)
int idx = anv_get_instance_entrypoint_index(name);
if (idx >= 0)
return anv_instance_dispatch_table.entrypoints[idx];
idx = anv_get_device_entrypoint_index(name);
if (idx >= 0)
return anv_resolve_device_entrypoint(devinfo, idx);
return NULL;
return anv_resolve_entrypoint(devinfo, idx);
}""", output_encoding='utf-8')
U32_MASK = 2**32 - 1
@@ -554,23 +651,40 @@ def main():
EntrypointParam('VkImage', 'pImage', 'VkImage* pImage')
]))
strmap = StringIntMap()
for num, e in enumerate(entrypoints):
strmap.add_string(e.name, num)
device_entrypoints = []
instance_entrypoints = []
for e in entrypoints:
if e.is_device_entrypoint():
device_entrypoints.append(e)
else:
instance_entrypoints.append(e)
device_strmap = StringIntMap()
for num, e in enumerate(device_entrypoints):
device_strmap.add_string(e.name, num)
e.num = num
strmap.bake()
device_strmap.bake()
instance_strmap = StringIntMap()
for num, e in enumerate(instance_entrypoints):
instance_strmap.add_string(e.name, num)
e.num = num
instance_strmap.bake()
# For outputting entrypoints.h we generate a anv_EntryPoint() prototype
# per entry point.
try:
with open(os.path.join(args.outdir, 'anv_entrypoints.h'), 'wb') as f:
f.write(TEMPLATE_H.render(entrypoints=entrypoints,
f.write(TEMPLATE_H.render(instance_entrypoints=instance_entrypoints,
device_entrypoints=device_entrypoints,
LAYERS=LAYERS,
filename=os.path.basename(__file__)))
with open(os.path.join(args.outdir, 'anv_entrypoints.c'), 'wb') as f:
f.write(TEMPLATE_C.render(entrypoints=entrypoints,
f.write(TEMPLATE_C.render(instance_entrypoints=instance_entrypoints,
device_entrypoints=device_entrypoints,
LAYERS=LAYERS,
strmap=strmap,
instance_strmap=instance_strmap,
device_strmap=device_strmap,
filename=os.path.basename(__file__)))
except Exception:
# In the even there's an error this imports some helpers from mako

View File

@@ -903,7 +903,8 @@ struct anv_instance {
struct anv_app_info app_info;
struct anv_instance_extension_table enabled_extensions;
struct anv_dispatch_table dispatch;
struct anv_instance_dispatch_table dispatch;
struct anv_device_dispatch_table device_dispatch;
int physicalDeviceCount;
struct anv_physical_device physicalDevice;
@@ -986,7 +987,7 @@ struct anv_device {
bool can_chain_batches;
bool robust_buffer_access;
struct anv_device_extension_table enabled_extensions;
struct anv_dispatch_table dispatch;
struct anv_device_dispatch_table dispatch;
pthread_mutex_t vma_mutex;
struct util_vma_heap vma_lo;
@@ -3242,10 +3243,15 @@ struct anv_query_pool {
struct anv_bo bo;
};
int anv_get_entrypoint_index(const char *name);
int anv_get_instance_entrypoint_index(const char *name);
int anv_get_device_entrypoint_index(const char *name);
bool
anv_entrypoint_is_enabled(int index, uint32_t core_version,
anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance);
bool
anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
const struct anv_instance_extension_table *instance,
const struct anv_device_extension_table *device);