util,gallium: put count in pipe_resource & sampler_view on its own cache line

This adds 60 bytes to both structures. It eliminates "False Sharing"
for atomic operations (see wikipedia).

Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11618>
This commit is contained in:
Marek Olšák
2021-06-27 17:55:29 -04:00
committed by Marge Bot
parent b34a3459f8
commit 8c6e18bc51
8 changed files with 35 additions and 8 deletions

View File

@@ -48,7 +48,7 @@ util_create_texture2d(struct pipe_screen *screen, unsigned width,
unsigned height, enum pipe_format format,
unsigned num_samples)
{
struct pipe_resource templ = {{0}};
struct pipe_resource templ = {0};
templ.target = PIPE_TEXTURE_2D;
templ.width0 = width;
@@ -698,7 +698,7 @@ test_texture_barrier(struct pipe_context *ctx, bool use_fbfetch,
"ADD OUT[0], TEMP[0], IMM[0]\n"
"END\n";
} else {
struct pipe_sampler_view templ = {{0}};
struct pipe_sampler_view templ = {0};
templ.format = cb->format;
templ.target = cb->target;
templ.swizzle_r = PIPE_SWIZZLE_X;

View File

@@ -469,7 +469,7 @@ create_staging_resource(struct d3d12_context *ctx,
unsigned mask)
{
struct pipe_resource templ = {{0}};
struct pipe_resource templ = {};
struct pipe_resource *staging_res;
struct pipe_box copy_src;

View File

@@ -320,7 +320,7 @@ d3d12_surface_update_pre_draw(struct d3d12_surface *surface,
if (mode == D3D12_SURFACE_CONVERSION_BGRA_UINT) {
if (!surface->rgba_texture) {
struct pipe_resource templ = {{0}};
struct pipe_resource templ = {};
struct pipe_resource *src = surface->base.texture;
templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;

View File

@@ -441,8 +441,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
* dummy texture there. */
if (!r300->screen->caps.is_r500) {
struct pipe_resource *tex;
struct pipe_resource rtempl = {{0}};
struct pipe_sampler_view vtempl = {{0}};
struct pipe_resource rtempl = {0};
struct pipe_sampler_view vtempl = {0};
rtempl.target = PIPE_TEXTURE_2D;
rtempl.format = PIPE_FORMAT_I8_UNORM;

View File

@@ -155,6 +155,23 @@ typedef unsigned char boolean;
#endif
/**
* Declare a variable on its own cache line.
*
* This helps eliminate "False sharing" to make atomic operations
* on pipe_reference::count faster and/or access to adjacent fields faster.
*
* https://en.wikipedia.org/wiki/False_sharing
*
* CALLOC_STRUCT_CL or MALLOC_STRUCT_CL and FREE_CL should be used to allocate
* structures that contain this.
*
* NOTE: Don't use PIPE_ALIGN_VAR because it causes the whole structure to be
* aligned, but we only want to align the field.
*/
#define EXCLUSIVE_CACHELINE(decl) \
union { char __cl_space[CACHE_LINE_SIZE]; \
decl; }
#if defined(__GNUC__)

View File

@@ -472,7 +472,9 @@ struct pipe_surface
*/
struct pipe_sampler_view
{
struct pipe_reference reference;
/* Put the refcount on its own cache line to prevent "False sharing". */
EXCLUSIVE_CACHELINE(struct pipe_reference reference);
enum pipe_format format:15; /**< typed PIPE_FORMAT_x */
enum pipe_texture_target target:5; /**< PIPE_TEXTURE_x */
unsigned swizzle_r:3; /**< PIPE_SWIZZLE_x for red component */
@@ -543,7 +545,8 @@ struct pipe_box
*/
struct pipe_resource
{
struct pipe_reference reference;
/* Put the refcount on its own cache line to prevent "False sharing". */
EXCLUSIVE_CACHELINE(struct pipe_reference reference);
unsigned width0; /**< Used by both buffers and textures. */
uint16_t height0; /* Textures: The maximum height/depth/array_size is 16k. */

View File

@@ -477,4 +477,7 @@ typedef int lock_cap_t;
#endif
/* TODO: this could be different on non-x86 architectures. */
#define CACHE_LINE_SIZE 64
#endif /* UTIL_MACROS_H */

View File

@@ -90,6 +90,10 @@ mem_dup(const void *src, size_t size)
#define Offset(TYPE, MEMBER) ((uintptr_t)&(((TYPE *)NULL)->MEMBER))
/* Allocate a structure aligned to a cache line. (used to make atomic ops faster) */
#define MALLOC_STRUCT_CL(T) (struct T *)align_malloc(sizeof(struct T), CACHE_LINE_SIZE)
#define CALLOC_STRUCT_CL(T) (struct T *)align_calloc(sizeof(struct T), CACHE_LINE_SIZE)
#define FREE_CL(ptr) align_free(ptr)
#ifdef __cplusplus
}