anv: Advertise larger heap sizes
Instead of just advertising the aperture size, we do something more intelligent. On systems with a full 48-bit PPGTT, we can address 100% of the available system RAM from the GPU. In order to keep clients from burning 100% of your available RAM for graphics resources, we have a nice little heuristic (which has received exactly zero tuning) to keep things under a reasonable level of control. Reviewed-by: Kristian H. Kristensen <krh@bitplanet.net>
This commit is contained in:
@@ -25,6 +25,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
#include <sys/sysinfo.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <xf86drm.h>
|
#include <xf86drm.h>
|
||||||
@@ -53,6 +54,48 @@ compiler_perf_log(void *data, const char *fmt, ...)
|
|||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
anv_compute_heap_size(int fd, uint64_t *heap_size)
|
||||||
|
{
|
||||||
|
uint64_t gtt_size;
|
||||||
|
if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
|
||||||
|
>t_size) == -1) {
|
||||||
|
/* If, for whatever reason, we can't actually get the GTT size from the
|
||||||
|
* kernel (too old?) fall back to the aperture size.
|
||||||
|
*/
|
||||||
|
anv_perf_warn("Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
|
||||||
|
|
||||||
|
if (anv_gem_get_aperture(fd, >t_size) == -1) {
|
||||||
|
return vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
|
||||||
|
"failed to get aperture size: %m");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Query the total ram from the system */
|
||||||
|
struct sysinfo info;
|
||||||
|
sysinfo(&info);
|
||||||
|
|
||||||
|
uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;
|
||||||
|
|
||||||
|
/* We don't want to burn too much ram with the GPU. If the user has 4GiB
|
||||||
|
* or less, we use at most half. If they have more than 4GiB, we use 3/4.
|
||||||
|
*/
|
||||||
|
uint64_t available_ram;
|
||||||
|
if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
|
||||||
|
available_ram = total_ram / 2;
|
||||||
|
else
|
||||||
|
available_ram = total_ram * 3 / 4;
|
||||||
|
|
||||||
|
/* We also want to leave some padding for things we allocate in the driver,
|
||||||
|
* so don't go over 3/4 of the GTT either.
|
||||||
|
*/
|
||||||
|
uint64_t available_gtt = gtt_size * 3 / 4;
|
||||||
|
|
||||||
|
*heap_size = MIN2(available_ram, available_gtt);
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
anv_device_get_cache_uuid(void *uuid)
|
anv_device_get_cache_uuid(void *uuid)
|
||||||
{
|
{
|
||||||
@@ -124,12 +167,6 @@ anv_physical_device_init(struct anv_physical_device *device,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) {
|
|
||||||
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
|
|
||||||
"failed to get aperture size: %m");
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
|
if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
|
||||||
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
|
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
|
||||||
"kernel missing gem wait");
|
"kernel missing gem wait");
|
||||||
@@ -151,6 +188,10 @@ anv_physical_device_init(struct anv_physical_device *device,
|
|||||||
|
|
||||||
device->supports_48bit_addresses = anv_gem_supports_48b_addresses(fd);
|
device->supports_48bit_addresses = anv_gem_supports_48b_addresses(fd);
|
||||||
|
|
||||||
|
result = anv_compute_heap_size(fd, &device->heap_size);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
if (!anv_device_get_cache_uuid(device->uuid)) {
|
if (!anv_device_get_cache_uuid(device->uuid)) {
|
||||||
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
|
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
|
||||||
"cannot generate UUID");
|
"cannot generate UUID");
|
||||||
@@ -735,12 +776,6 @@ void anv_GetPhysicalDeviceMemoryProperties(
|
|||||||
VkPhysicalDeviceMemoryProperties* pMemoryProperties)
|
VkPhysicalDeviceMemoryProperties* pMemoryProperties)
|
||||||
{
|
{
|
||||||
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
||||||
VkDeviceSize heap_size;
|
|
||||||
|
|
||||||
/* Reserve some wiggle room for the driver by exposing only 75% of the
|
|
||||||
* aperture to the heap.
|
|
||||||
*/
|
|
||||||
heap_size = 3 * physical_device->aperture_size / 4;
|
|
||||||
|
|
||||||
if (physical_device->info.has_llc) {
|
if (physical_device->info.has_llc) {
|
||||||
/* Big core GPUs share LLC with the CPU and thus one memory type can be
|
/* Big core GPUs share LLC with the CPU and thus one memory type can be
|
||||||
@@ -777,7 +812,7 @@ void anv_GetPhysicalDeviceMemoryProperties(
|
|||||||
|
|
||||||
pMemoryProperties->memoryHeapCount = 1;
|
pMemoryProperties->memoryHeapCount = 1;
|
||||||
pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
|
pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
|
||||||
.size = heap_size,
|
.size = physical_device->heap_size,
|
||||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@@ -287,6 +287,22 @@ anv_gem_destroy_context(struct anv_device *device, int context)
|
|||||||
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
|
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
anv_gem_get_context_param(int fd, int context, uint32_t param, uint64_t *value)
|
||||||
|
{
|
||||||
|
struct drm_i915_gem_context_param gp = {
|
||||||
|
.ctx_id = context,
|
||||||
|
.param = param,
|
||||||
|
};
|
||||||
|
|
||||||
|
int ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &gp);
|
||||||
|
if (ret == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
*value = gp.value;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
anv_gem_get_aperture(int fd, uint64_t *size)
|
anv_gem_get_aperture(int fd, uint64_t *size)
|
||||||
{
|
{
|
||||||
|
@@ -516,7 +516,15 @@ struct anv_physical_device {
|
|||||||
char path[20];
|
char path[20];
|
||||||
const char * name;
|
const char * name;
|
||||||
struct gen_device_info info;
|
struct gen_device_info info;
|
||||||
uint64_t aperture_size;
|
/** Amount of "GPU memory" we want to advertise
|
||||||
|
*
|
||||||
|
* Clearly, this value is bogus since Intel is a UMA architecture. On
|
||||||
|
* gen7 platforms, we are limited by GTT size unless we want to implement
|
||||||
|
* fine-grained tracking and GTT splitting. On Broadwell and above we are
|
||||||
|
* practically unlimited. However, we will never report more than 3/4 of
|
||||||
|
* the total system ram to try and avoid running out of RAM.
|
||||||
|
*/
|
||||||
|
uint64_t heap_size;
|
||||||
bool supports_48bit_addresses;
|
bool supports_48bit_addresses;
|
||||||
struct brw_compiler * compiler;
|
struct brw_compiler * compiler;
|
||||||
struct isl_device isl_dev;
|
struct isl_device isl_dev;
|
||||||
@@ -652,6 +660,8 @@ int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
|
|||||||
uint32_t stride, uint32_t tiling);
|
uint32_t stride, uint32_t tiling);
|
||||||
int anv_gem_create_context(struct anv_device *device);
|
int anv_gem_create_context(struct anv_device *device);
|
||||||
int anv_gem_destroy_context(struct anv_device *device, int context);
|
int anv_gem_destroy_context(struct anv_device *device, int context);
|
||||||
|
int anv_gem_get_context_param(int fd, int context, uint32_t param,
|
||||||
|
uint64_t *value);
|
||||||
int anv_gem_get_param(int fd, uint32_t param);
|
int anv_gem_get_param(int fd, uint32_t param);
|
||||||
bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
|
bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
|
||||||
int anv_gem_get_aperture(int fd, uint64_t *size);
|
int anv_gem_get_aperture(int fd, uint64_t *size);
|
||||||
|
Reference in New Issue
Block a user