ralloc: don't memset ralloc_header, clear it manually
time GALLIUM_NOOP=1 ./run shaders/private/alien_isolation/ >/dev/null Before (2 takes): real 0m8.734s 0m8.773s user 0m34.232s 0m34.348s sys 0m0.084s 0m0.056s After (2 takes): real 0m8.448s 0m8.463s user 0m33.104s 0m33.160s sys 0m0.088s 0m0.076s Average change in "real" time spent: -3.4% calloc should only do 2 things compared to malloc: - check for overflow of "n * size" - call memset I'm not sure if that explains the difference. v2: clear "parent" and "next" in the caller of add_child. Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> (v1) Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> (v1) Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (v1)
This commit is contained in:
@@ -122,13 +122,27 @@ ralloc_size(const void *ctx, size_t size)
|
||||
void *
|
||||
rzalloc_size(const void *ctx, size_t size)
|
||||
{
|
||||
void *block = calloc(1, size + sizeof(ralloc_header));
|
||||
void *block = malloc(size + sizeof(ralloc_header));
|
||||
ralloc_header *info;
|
||||
ralloc_header *parent;
|
||||
|
||||
if (unlikely(block == NULL))
|
||||
return NULL;
|
||||
|
||||
info = (ralloc_header *) block;
|
||||
/* measurements have shown that calloc is slower (because of
|
||||
* the multiplication overflow checking?), so clear things
|
||||
* manually
|
||||
*/
|
||||
info->parent = NULL;
|
||||
info->child = NULL;
|
||||
info->prev = NULL;
|
||||
info->next = NULL;
|
||||
info->destructor = NULL;
|
||||
|
||||
/* memset the allocation except for ralloc_header */
|
||||
memset(&info[1], 0, size);
|
||||
|
||||
parent = ctx != NULL ? get_header(ctx) : NULL;
|
||||
|
||||
add_child(parent, info);
|
||||
|
Reference in New Issue
Block a user