radeonsi: add testmemperf mem bandwidth test

This commit adds a simple test to measure bandwidth to/from memory
domains.
It's using the winsys functions, not the driver ones, to be able
to control the domains and flags.

Acked-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29073>
This commit is contained in:
Pierre-Eric Pelloux-Prayer
2024-02-16 12:16:37 +01:00
committed by Marge Bot
parent cd9f6f9e85
commit 0f25cef8aa
3 changed files with 117 additions and 4 deletions

View File

@@ -126,6 +126,7 @@ static const struct debug_named_value test_options[] = {
{"testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit."},
{"testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit."},
{"testdmaperf", DBG(TEST_DMA_PERF), "Test DMA performance"},
{"testmemperf", DBG(TEST_MEM_PERF), "Test map + memcpy perf using the winsys."},
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -1502,6 +1503,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
si_test_dma_perf(sscreen);
}
if (test_flags & DBG(TEST_MEM_PERF))
si_test_mem_perf(sscreen);
if (test_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SHADER)))
si_test_vmfault(sscreen, test_flags);

View File

@@ -253,6 +253,7 @@ enum
DBG_TEST_VMFAULT_CP,
DBG_TEST_VMFAULT_SHADER,
DBG_TEST_DMA_PERF,
DBG_TEST_MEM_PERF,
};
#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
@@ -1729,9 +1730,11 @@ void si_gfx11_destroy_query(struct si_context *sctx);
void si_test_image_copy_region(struct si_screen *sscreen);
void si_test_blit(struct si_screen *sscreen, unsigned test_flags);
/* si_test_clearbuffer.c */
/* si_test_dma_perf.c */
void si_test_dma_perf(struct si_screen *sscreen);
void si_test_mem_perf(struct si_screen *sscreen);
/* si_uvd.c */
struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ);

View File

@@ -1,13 +1,12 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
* Copyright 2024 Advanced Micro Devices, Inc.
*
* SPDX-License-Identifier: MIT
*/
/* This file implements tests on the si_clearbuffer function. */
#include "si_pipe.h"
#include "si_query.h"
#include "util/streaming-load-memcpy.h"
#define MIN_SIZE 512
#define MAX_SIZE (128 * 1024 * 1024)
@@ -430,3 +429,110 @@ void si_test_dma_perf(struct si_screen *sscreen)
ctx->destroy(ctx);
exit(0);
}
void
si_test_mem_perf(struct si_screen *sscreen)
{
struct radeon_winsys *ws = sscreen->ws;
const size_t buffer_size = 16 * 1024 * 1024;
const enum radeon_bo_domain domains[] = { 0, RADEON_DOMAIN_VRAM, RADEON_DOMAIN_GTT };
const uint64_t flags[] = { 0, RADEON_FLAG_GTT_WC };
const int n_loops = 2;
char *title[] = { "Write To", "Read From", "Stream From" };
char *domain_str[] = { "RAM", "VRAM", "GTT" };
for (int i = 0; i < 3; i++) {
printf("| %12s", title[i]);
printf(" | Size (kB) | Flags |");
for (int l = 0; l < n_loops; l++)
printf(" Run %d (MB/s) |", l + 1);
printf("\n");
printf("|--------------|-----------|-------|");
for (int l = 0; l < n_loops; l++)
printf("--------------|");
printf("\n");
for (int j = 0; j < ARRAY_SIZE(domains); j++) {
enum radeon_bo_domain domain = domains[j];
for (int k = 0; k < ARRAY_SIZE(flags); k++) {
if (k && domain != RADEON_DOMAIN_GTT)
continue;
struct pb_buffer_lean *bo = NULL;
void *ptr = NULL;
if (domains[j]) {
bo = ws->buffer_create(ws, buffer_size, 4096, domains[j],
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_NO_SUBALLOC |
flags[k]);
if (!bo)
continue;
ptr = ws->buffer_map(ws, bo, NULL, RADEON_MAP_TEMPORARY | (i ? PIPE_MAP_READ : PIPE_MAP_WRITE));
if (!ptr) {
radeon_bo_reference(ws, &bo, NULL);
continue;
}
} else {
ptr = malloc(buffer_size);
}
printf("| %12s |", domain_str[j]);
printf("%10zu |", buffer_size / 1024);
printf(" %5s |", domain == RADEON_DOMAIN_VRAM ? "(WC)" : (k == 0 ? "" : "WC "));
int *cpu = calloc(1, buffer_size);
memset(cpu, 'c', buffer_size);
fflush(stdout);
int64_t before, after;
for (int loop = 0; loop < n_loops; loop++) {
before = os_time_get_nano();
switch (i) {
case 0:
memcpy(ptr, cpu, buffer_size);
break;
case 1:
memcpy(cpu, ptr, buffer_size);
break;
case 2:
default:
util_streaming_load_memcpy(cpu, ptr, buffer_size);
break;
}
after = os_time_get_nano();
/* Pretend to do something with the result to make sure it's
* not skipped.
*/
if (debug_get_num_option("AMD_DEBUG", 0) == 0x123)
assert(memcmp(ptr, cpu, buffer_size));
float dt = (after - before) / (1000000000.0);
float bandwidth = (buffer_size / (1024 * 1024)) / dt;
printf("%13.3f |", bandwidth);
}
printf("\n");
free(cpu);
if (bo) {
ws->buffer_unmap(ws, bo);
radeon_bo_reference(ws, &bo, NULL);
} else {
free(ptr);
}
}
}
printf("\n");
}
exit(0);
}