radeonsi: add testmemperf mem bandwidth test
This commit adds a simple test to measure bandwidth to/from memory domains. It's using the winsys functions, not the driver ones, to be able to control the domains and flags. Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29073>
This commit is contained in:

committed by
Marge Bot

parent
cd9f6f9e85
commit
0f25cef8aa
@@ -126,6 +126,7 @@ static const struct debug_named_value test_options[] = {
|
||||
{"testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit."},
|
||||
{"testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit."},
|
||||
{"testdmaperf", DBG(TEST_DMA_PERF), "Test DMA performance"},
|
||||
{"testmemperf", DBG(TEST_MEM_PERF), "Test map + memcpy perf using the winsys."},
|
||||
|
||||
DEBUG_NAMED_VALUE_END /* must be last */
|
||||
};
|
||||
@@ -1502,6 +1503,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
||||
si_test_dma_perf(sscreen);
|
||||
}
|
||||
|
||||
if (test_flags & DBG(TEST_MEM_PERF))
|
||||
si_test_mem_perf(sscreen);
|
||||
|
||||
if (test_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SHADER)))
|
||||
si_test_vmfault(sscreen, test_flags);
|
||||
|
||||
|
@@ -253,6 +253,7 @@ enum
|
||||
DBG_TEST_VMFAULT_CP,
|
||||
DBG_TEST_VMFAULT_SHADER,
|
||||
DBG_TEST_DMA_PERF,
|
||||
DBG_TEST_MEM_PERF,
|
||||
};
|
||||
|
||||
#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
|
||||
@@ -1729,9 +1730,11 @@ void si_gfx11_destroy_query(struct si_context *sctx);
|
||||
void si_test_image_copy_region(struct si_screen *sscreen);
|
||||
void si_test_blit(struct si_screen *sscreen, unsigned test_flags);
|
||||
|
||||
/* si_test_clearbuffer.c */
|
||||
/* si_test_dma_perf.c */
|
||||
void si_test_dma_perf(struct si_screen *sscreen);
|
||||
|
||||
void si_test_mem_perf(struct si_screen *sscreen);
|
||||
|
||||
/* si_uvd.c */
|
||||
struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
|
||||
const struct pipe_video_codec *templ);
|
||||
|
@@ -1,13 +1,12 @@
|
||||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
* Copyright 2024 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* This file implements tests on the si_clearbuffer function. */
|
||||
|
||||
#include "si_pipe.h"
|
||||
#include "si_query.h"
|
||||
#include "util/streaming-load-memcpy.h"
|
||||
|
||||
#define MIN_SIZE 512
|
||||
#define MAX_SIZE (128 * 1024 * 1024)
|
||||
@@ -430,3 +429,110 @@ void si_test_dma_perf(struct si_screen *sscreen)
|
||||
ctx->destroy(ctx);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void
|
||||
si_test_mem_perf(struct si_screen *sscreen)
|
||||
{
|
||||
struct radeon_winsys *ws = sscreen->ws;
|
||||
const size_t buffer_size = 16 * 1024 * 1024;
|
||||
const enum radeon_bo_domain domains[] = { 0, RADEON_DOMAIN_VRAM, RADEON_DOMAIN_GTT };
|
||||
const uint64_t flags[] = { 0, RADEON_FLAG_GTT_WC };
|
||||
const int n_loops = 2;
|
||||
char *title[] = { "Write To", "Read From", "Stream From" };
|
||||
char *domain_str[] = { "RAM", "VRAM", "GTT" };
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
printf("| %12s", title[i]);
|
||||
|
||||
printf(" | Size (kB) | Flags |");
|
||||
for (int l = 0; l < n_loops; l++)
|
||||
printf(" Run %d (MB/s) |", l + 1);
|
||||
printf("\n");
|
||||
|
||||
printf("|--------------|-----------|-------|");
|
||||
for (int l = 0; l < n_loops; l++)
|
||||
printf("--------------|");
|
||||
printf("\n");
|
||||
for (int j = 0; j < ARRAY_SIZE(domains); j++) {
|
||||
enum radeon_bo_domain domain = domains[j];
|
||||
for (int k = 0; k < ARRAY_SIZE(flags); k++) {
|
||||
if (k && domain != RADEON_DOMAIN_GTT)
|
||||
continue;
|
||||
|
||||
struct pb_buffer_lean *bo = NULL;
|
||||
void *ptr = NULL;
|
||||
|
||||
if (domains[j]) {
|
||||
bo = ws->buffer_create(ws, buffer_size, 4096, domains[j],
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_NO_SUBALLOC |
|
||||
flags[k]);
|
||||
if (!bo)
|
||||
continue;
|
||||
|
||||
ptr = ws->buffer_map(ws, bo, NULL, RADEON_MAP_TEMPORARY | (i ? PIPE_MAP_READ : PIPE_MAP_WRITE));
|
||||
if (!ptr) {
|
||||
radeon_bo_reference(ws, &bo, NULL);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
ptr = malloc(buffer_size);
|
||||
}
|
||||
|
||||
printf("| %12s |", domain_str[j]);
|
||||
|
||||
printf("%10zu |", buffer_size / 1024);
|
||||
|
||||
printf(" %5s |", domain == RADEON_DOMAIN_VRAM ? "(WC)" : (k == 0 ? "" : "WC "));
|
||||
|
||||
int *cpu = calloc(1, buffer_size);
|
||||
memset(cpu, 'c', buffer_size);
|
||||
fflush(stdout);
|
||||
|
||||
int64_t before, after;
|
||||
|
||||
for (int loop = 0; loop < n_loops; loop++) {
|
||||
before = os_time_get_nano();
|
||||
|
||||
switch (i) {
|
||||
case 0:
|
||||
memcpy(ptr, cpu, buffer_size);
|
||||
break;
|
||||
case 1:
|
||||
memcpy(cpu, ptr, buffer_size);
|
||||
break;
|
||||
case 2:
|
||||
default:
|
||||
util_streaming_load_memcpy(cpu, ptr, buffer_size);
|
||||
break;
|
||||
}
|
||||
|
||||
after = os_time_get_nano();
|
||||
|
||||
/* Pretend to do something with the result to make sure it's
|
||||
* not skipped.
|
||||
*/
|
||||
if (debug_get_num_option("AMD_DEBUG", 0) == 0x123)
|
||||
assert(memcmp(ptr, cpu, buffer_size));
|
||||
|
||||
float dt = (after - before) / (1000000000.0);
|
||||
float bandwidth = (buffer_size / (1024 * 1024)) / dt;
|
||||
|
||||
printf("%13.3f |", bandwidth);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
free(cpu);
|
||||
if (bo) {
|
||||
ws->buffer_unmap(ws, bo);
|
||||
radeon_bo_reference(ws, &bo, NULL);
|
||||
} else {
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
Reference in New Issue
Block a user