[intel] Add a driconf option to cache freed buffer objects for reuse.

This is defaulted off as it has potentially large memory costs for a modest
performance gain.  Ideally we will improve DRM performance to the point where
this optimization is not worth the memory cost in any case, or find some
middle ground in caching only limited numbers of certain buffers.  For now,
this provides a modest 4% improvement in openarena on GM965 and 10% in openarena
on GM945.
This commit is contained in:
Eric Anholt
2008-03-05 14:14:54 -08:00
parent fc21e9cdd0
commit fe91c05b54
5 changed files with 209 additions and 15 deletions

View File

@@ -72,6 +72,28 @@ struct intel_validate_entry {
struct drm_i915_op_arg bo_arg; struct drm_i915_op_arg bo_arg;
}; };
struct dri_ttm_bo_bucket_entry {
drmBO drm_bo;
struct dri_ttm_bo_bucket_entry *next;
};
struct dri_ttm_bo_bucket {
struct dri_ttm_bo_bucket_entry *head;
struct dri_ttm_bo_bucket_entry **tail;
/**
* Limit on the number of entries in this bucket.
*
* 0 means that this caching at this bucket size is disabled.
* -1 means that there is no limit to caching at this size.
*/
int max_entries;
int num_entries;
};
/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
* is 1 << 16 pages, or 256MB.
*/
#define INTEL_TTM_BO_BUCKETS 16
typedef struct _dri_bufmgr_ttm { typedef struct _dri_bufmgr_ttm {
dri_bufmgr bufmgr; dri_bufmgr bufmgr;
@@ -84,6 +106,9 @@ typedef struct _dri_bufmgr_ttm {
struct intel_validate_entry *validate_array; struct intel_validate_entry *validate_array;
int validate_array_size; int validate_array_size;
int validate_count; int validate_count;
/** Array of lists of cached drmBOs of power-of-two sizes */
struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
} dri_bufmgr_ttm; } dri_bufmgr_ttm;
/** /**
@@ -137,6 +162,41 @@ typedef struct _dri_fence_ttm
drmFence drm_fence; drmFence drm_fence;
} dri_fence_ttm; } dri_fence_ttm;
static int
logbase2(int n)
{
GLint i = 1;
GLint log2 = 0;
while (n > i) {
i *= 2;
log2++;
}
return log2;
}
static struct dri_ttm_bo_bucket *
dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
{
int i;
/* We only do buckets in power of two increments */
if ((size & (size - 1)) != 0)
return NULL;
/* We should only see sizes rounded to pages. */
assert((size % 4096) == 0);
/* We always allocate in units of pages */
i = ffs(size / 4096) - 1;
if (i >= INTEL_TTM_BO_BUCKETS)
return NULL;
return &bufmgr_ttm->cache_bucket[i];
}
static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
{ {
int i, j; int i, j;
@@ -338,6 +398,9 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
int ret; int ret;
uint64_t flags; uint64_t flags;
unsigned int hint; unsigned int hint;
unsigned long alloc_size;
struct dri_ttm_bo_bucket *bucket;
GLboolean alloc_from_cache = GL_FALSE;
ttm_buf = calloc(1, sizeof(*ttm_buf)); ttm_buf = calloc(1, sizeof(*ttm_buf));
if (!ttm_buf) if (!ttm_buf)
@@ -352,13 +415,48 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
/* No hints we want to use. */ /* No hints we want to use. */
hint = 0; hint = 0;
ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize, /* Round the allocated size up to a power of two number of pages. */
NULL, flags, hint, &ttm_buf->drm_bo); alloc_size = 1 << logbase2(size);
if (ret != 0) { if (alloc_size < pageSize)
free(ttm_buf); alloc_size = pageSize;
return NULL; bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
/* If we don't have caching at this size, don't actually round the
* allocation up.
*/
if (bucket == NULL || bucket->max_entries == 0)
alloc_size = size;
/* Get a buffer out of the cache if available */
if (bucket != NULL && bucket->num_entries > 0) {
struct dri_ttm_bo_bucket_entry *entry = bucket->head;
int busy;
/* Check if the buffer is still in flight. If not, reuse it. */
ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
alloc_from_cache = (ret == 0 && busy == 0);
if (alloc_from_cache) {
bucket->head = entry->next;
if (entry->next == NULL)
bucket->tail = &bucket->head;
bucket->num_entries--;
ttm_buf->drm_bo = entry->drm_bo;
free(entry);
}
} }
ttm_buf->bo.size = ttm_buf->drm_bo.size;
if (!alloc_from_cache) {
ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
NULL, flags, hint, &ttm_buf->drm_bo);
if (ret != 0) {
free(ttm_buf);
return NULL;
}
}
ttm_buf->bo.size = size;
ttm_buf->bo.offset = ttm_buf->drm_bo.offset; ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
ttm_buf->bo.virtual = NULL; ttm_buf->bo.virtual = NULL;
ttm_buf->bo.bufmgr = bufmgr; ttm_buf->bo.bufmgr = bufmgr;
@@ -450,6 +548,7 @@ dri_ttm_bo_unreference(dri_bo *buf)
return; return;
if (--ttm_buf->refcount == 0) { if (--ttm_buf->refcount == 0) {
struct dri_ttm_bo_bucket *bucket;
int ret; int ret;
assert(ttm_buf->map_count == 0); assert(ttm_buf->map_count == 0);
@@ -476,11 +575,32 @@ dri_ttm_bo_unreference(dri_bo *buf)
} }
} }
ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
if (ret != 0) { /* Put the buffer into our internal cache for reuse if we can. */
fprintf(stderr, "drmBOUnreference failed (%s): %s\n", if (!ttm_buf->shared &&
ttm_buf->name, strerror(-ret)); bucket != NULL &&
(bucket->max_entries == -1 ||
(bucket->max_entries > 0 &&
bucket->num_entries < bucket->max_entries)))
{
struct dri_ttm_bo_bucket_entry *entry;
entry = calloc(1, sizeof(*entry));
entry->drm_bo = ttm_buf->drm_bo;
entry->next = NULL;
*bucket->tail = entry;
bucket->tail = &entry->next;
bucket->num_entries++;
} else {
/* Decrement the kernel refcount for the buffer. */
ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
if (ret != 0) {
fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
ttm_buf->name, strerror(-ret));
}
} }
DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
free(buf); free(buf);
@@ -657,9 +777,34 @@ static void
dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
{ {
dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
int i;
free(bufmgr_ttm->validate_array); free(bufmgr_ttm->validate_array);
/* Free any cached buffer objects we were going to reuse */
for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
struct dri_ttm_bo_bucket_entry *entry;
while ((entry = bucket->head) != NULL) {
int ret;
bucket->head = entry->next;
if (entry->next == NULL)
bucket->tail = &bucket->head;
bucket->num_entries--;
/* Decrement the kernel refcount for the buffer. */
ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
if (ret != 0) {
fprintf(stderr, "drmBOUnreference failed: %s\n",
strerror(-ret));
}
free(entry);
}
}
free(bufmgr); free(bufmgr);
} }
@@ -876,6 +1021,24 @@ dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
bufmgr_ttm->validate_count = 0; bufmgr_ttm->validate_count = 0;
} }
/**
* Enables unlimited caching of buffer objects for reuse.
*
* This is potentially very memory expensive, as the cache at each bucket
* size is only bounded by how many buffers of that size we've managed to have
* in flight at once.
*/
void
intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
{
dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
int i;
for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
bufmgr_ttm->cache_bucket[i].max_entries = -1;
}
}
/** /**
* Initializes the TTM buffer manager, which uses the kernel to allocate, map, * Initializes the TTM buffer manager, which uses the kernel to allocate, map,
* and manage map buffer objections. * and manage map buffer objections.
@@ -890,6 +1053,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
unsigned int fence_type_flush, int batch_size) unsigned int fence_type_flush, int batch_size)
{ {
dri_bufmgr_ttm *bufmgr_ttm; dri_bufmgr_ttm *bufmgr_ttm;
int i;
bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
bufmgr_ttm->fd = fd; bufmgr_ttm->fd = fd;
@@ -919,6 +1083,10 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
bufmgr_ttm->bufmgr.debug = GL_FALSE; bufmgr_ttm->bufmgr.debug = GL_FALSE;
/* Initialize the linked lists for BO reuse cache. */
for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
return &bufmgr_ttm->bufmgr; return &bufmgr_ttm->bufmgr;
} }

View File

@@ -14,4 +14,7 @@ dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
unsigned int fence_type_flush, int batch_size); unsigned int fence_type_flush, int batch_size);
void
intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr);
#endif #endif

View File

@@ -456,6 +456,7 @@ intel_init_bufmgr(struct intel_context *intel)
ttm_supported = GL_FALSE; ttm_supported = GL_FALSE;
if (!ttm_disable && ttm_supported) { if (!ttm_disable && ttm_supported) {
int bo_reuse_mode;
intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
DRM_FENCE_TYPE_EXE, DRM_FENCE_TYPE_EXE,
DRM_FENCE_TYPE_EXE | DRM_FENCE_TYPE_EXE |
@@ -463,6 +464,15 @@ intel_init_bufmgr(struct intel_context *intel)
BATCH_SZ); BATCH_SZ);
if (intel->bufmgr != NULL) if (intel->bufmgr != NULL)
intel->ttm = GL_TRUE; intel->ttm = GL_TRUE;
bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
switch (bo_reuse_mode) {
case DRI_CONF_BO_REUSE_DISABLED:
break;
case DRI_CONF_BO_REUSE_ALL:
intel_ttm_enable_bo_reuse(intel->bufmgr);
break;
}
} }
/* Otherwise, use the classic buffer manager. */ /* Otherwise, use the classic buffer manager. */
if (intel->bufmgr == NULL) { if (intel->bufmgr == NULL) {
@@ -548,6 +558,9 @@ intelInitContext(struct intel_context *intel,
intel->width = intelScreen->width; intel->width = intelScreen->width;
intel->height = intelScreen->height; intel->height = intelScreen->height;
driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
intel->driScreen->myNum,
IS_965(intelScreen->deviceID) ? "i965" : "i915");
if (intelScreen->deviceID == PCI_CHIP_I865_G) if (intelScreen->deviceID == PCI_CHIP_I865_G)
intel->maxBatchSize = 4096; intel->maxBatchSize = 4096;
else else
@@ -556,10 +569,6 @@ intelInitContext(struct intel_context *intel,
if (!intel_init_bufmgr(intel)) if (!intel_init_bufmgr(intel))
return GL_FALSE; return GL_FALSE;
driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
intel->driScreen->myNum,
IS_965(intelScreen->deviceID) ? "i965" : "i915");
ctx->Const.MaxTextureMaxAnisotropy = 2.0; ctx->Const.MaxTextureMaxAnisotropy = 2.0;
/* This doesn't yet catch all non-conformant rendering, but it's a /* This doesn't yet catch all non-conformant rendering, but it's a

View File

@@ -476,6 +476,11 @@ extern void intelInitStateFuncs(struct dd_function_table *functions);
#define BLENDFACT_INV_CONST_ALPHA 0x0f #define BLENDFACT_INV_CONST_ALPHA 0x0f
#define BLENDFACT_MASK 0x0f #define BLENDFACT_MASK 0x0f
enum {
DRI_CONF_BO_REUSE_DISABLED,
DRI_CONF_BO_REUSE_ALL
};
extern int intel_translate_shadow_compare_func(GLenum func); extern int intel_translate_shadow_compare_func(GLenum func);
extern int intel_translate_compare_func(GLenum func); extern int intel_translate_compare_func(GLenum func);
extern int intel_translate_stencil_op(GLenum op); extern int intel_translate_stencil_op(GLenum op);

View File

@@ -56,6 +56,15 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_SECTION_PERFORMANCE DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
/* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
* DRI_CONF_BO_REUSE_ALL
*/
DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1")
DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
DRI_CONF_ENUM(0, "Disable buffer object reuse")
DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
DRI_CONF_DESC_END
DRI_CONF_OPT_END
DRI_CONF_SECTION_END DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY DRI_CONF_SECTION_QUALITY
DRI_CONF_FORCE_S3TC_ENABLE(false) DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -66,7 +75,7 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_SECTION_END DRI_CONF_SECTION_END
DRI_CONF_END; DRI_CONF_END;
const GLuint __driNConfigOptions = 5; const GLuint __driNConfigOptions = 6;
#ifdef USE_NEW_INTERFACE #ifdef USE_NEW_INTERFACE
static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;