radv: introduce perf test env var and allow to enable chaining

We have some features that seem to slow things down or cause other
possible undesireable side effects, but it would be nice to test
games etc with them easily.

I forsee multisample DCC and maybe some shader opt changes using this.

For now use it for batch chaining.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie
2017-05-09 04:17:30 +01:00
parent d0a26edc25
commit c2464271a0
7 changed files with 20 additions and 4 deletions

View File

@@ -37,4 +37,7 @@ enum {
RADV_DEBUG_NO_IBS = 0x200,
};
enum {
RADV_PERFTEST_BATCHCHAIN = 0x1,
};
#endif

View File

@@ -270,7 +270,8 @@ radv_physical_device_init(struct radv_physical_device *device,
assert(strlen(path) < ARRAY_SIZE(device->path));
strncpy(device->path, path, ARRAY_SIZE(device->path));
device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
instance->perftest_flags);
if (!device->ws) {
result = VK_ERROR_INCOMPATIBLE_DRIVER;
goto fail;
@@ -367,6 +368,11 @@ static const struct debug_control radv_debug_options[] = {
{NULL, 0}
};
static const struct debug_control radv_perftest_options[] = {
{"batchchain", RADV_PERFTEST_BATCHCHAIN},
{NULL, 0}
};
VkResult radv_CreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
@@ -424,6 +430,9 @@ VkResult radv_CreateInstance(
instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
radv_debug_options);
instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
radv_perftest_options);
*pInstance = radv_instance_to_handle(instance);
return VK_SUCCESS;

View File

@@ -288,6 +288,7 @@ struct radv_instance {
struct radv_physical_device physicalDevices[RADV_MAX_DRM_DEVICES];
uint64_t debug_flags;
uint64_t perftest_flags;
};
VkResult radv_init_wsi(struct radv_physical_device *physical_device);

View File

@@ -931,7 +931,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
if (!cs->ws->use_ib_bos) {
ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else {

View File

@@ -82,7 +82,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
}
struct radeon_winsys *
radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
{
uint32_t drm_major, drm_minor, r;
amdgpu_device_handle dev;
@@ -106,6 +106,7 @@ radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
if (debug_flags & RADV_DEBUG_NO_IBS)
ws->use_ib_bos = false;
ws->batchchain = !!(perftest_flags & RADV_PERFTEST_BATCHCHAIN);
LIST_INITHEAD(&ws->global_bo_list);
pthread_mutex_init(&ws->global_bo_list_lock, NULL);
ws->base.query_info = radv_amdgpu_winsys_query_info;

View File

@@ -43,6 +43,7 @@ struct radv_amdgpu_winsys {
ADDR_HANDLE addrlib;
bool debug_all_bos;
bool batchchain;
pthread_mutex_t global_bo_list_lock;
struct list_head global_bo_list;
unsigned num_buffers;

View File

@@ -29,6 +29,7 @@
#ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
#define RADV_AMDGPU_WINSYS_PUBLIC_H
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint32_t debug_flags);
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
uint64_t perftest_flags);
#endif /* RADV_AMDGPU_WINSYS_PUBLIC_H */