radeonsi: add new R600_DEBUG test "testclearbufperf"

Signed-off-by: Darren Powell <darren.powell@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Darren Powell
2018-06-13 18:54:24 -04:00
committed by Marek Olšák
parent 977638006b
commit 726a48c94f
8 changed files with 170 additions and 11 deletions

View File

@@ -43,6 +43,7 @@ C_SOURCES := \
si_state_streamout.c \
si_state_viewport.c \
si_state.h \
si_test_clearbuffer.c \
si_test_dma.c \
si_texture.c \
si_uvd.c \

View File

@@ -59,6 +59,7 @@ files_libradeonsi = files(
'si_state_shaders.c',
'si_state_streamout.c',
'si_state_viewport.c',
'si_test_clearbuffer.c',
'si_test_dma.c',
'si_texture.c',
'si_uvd.c',

View File

@@ -256,7 +256,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
}
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
clear_value, SI_COHERENCY_CB_META);
clear_value, SI_COHERENCY_CB_META, SI_METHOD_BEST);
}
/* Set the same micro tile mode as the destination of the last MSAA resolve.
@@ -489,7 +489,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
tex->cmask_offset, tex->surface.cmask_size,
0xCCCCCCCC, SI_COHERENCY_CB_META);
0xCCCCCCCC, SI_COHERENCY_CB_META, SI_METHOD_BEST);
need_decompress_pass = true;
}
@@ -520,7 +520,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
/* Do the fast clear. */
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
tex->cmask_offset, tex->surface.cmask_size, 0,
SI_COHERENCY_CB_META);
SI_COHERENCY_CB_META, SI_METHOD_BEST);
need_decompress_pass = true;
}

View File

@@ -215,7 +215,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value,
enum si_coherency coher)
enum si_coherency coher, enum si_method xfer )
{
struct radeon_winsys *ws = sctx->ws;
struct r600_resource *rdst = r600_resource(dst);
@@ -227,7 +227,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (!size)
return;
dma_clear_size = size & ~3ull;
dma_clear_size = size & ~3ull;
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
@@ -250,7 +250,9 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
* For example, DeusEx:MD has 21 buffer clears per frame and all
* of them are moved to SDMA thanks to this. */
!ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf,
RADEON_USAGE_READWRITE))) {
RADEON_USAGE_READWRITE)) &&
/* bypass sdma transfer with param xfer */
(xfer != SI_METHOD_CP_DMA)) {
sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
offset += dma_clear_size;
@@ -263,7 +265,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
/* Flush the caches. */
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
while (dma_clear_size) {
unsigned byte_count = MIN2(dma_clear_size, cp_dma_max_byte_count(sctx));
@@ -356,7 +358,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
}
si_clear_buffer(sctx, dst, offset, size, dword_value,
SI_COHERENCY_SHADER);
SI_COHERENCY_SHADER, SI_METHOD_BEST);
}
/**

View File

@@ -101,6 +101,7 @@ static const struct debug_named_value debug_options[] = {
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
{ "testclearbufperf", DBG(TEST_CLEARBUF_PERF), "Test Clearbuffer Performance" },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -545,7 +546,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
/* Clear the NULL constant buffer, because loads should return zeros. */
si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
sctx->null_const_buf.buffer->width0, 0,
SI_COHERENCY_SHADER);
SI_COHERENCY_SHADER, SI_METHOD_BEST);
}
uint64_t max_threads_per_block;
@@ -1069,6 +1070,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
if (sscreen->debug_flags & DBG(TEST_DMA))
si_test_dma(sscreen);
if (sscreen->debug_flags & DBG(TEST_CLEARBUF_PERF)) {
si_test_clearbuffer(sscreen);
}
if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
DBG(TEST_VMFAULT_SDMA) |
DBG(TEST_VMFAULT_SHADER)))

View File

@@ -165,6 +165,7 @@ enum {
DBG_TEST_VMFAULT_CP,
DBG_TEST_VMFAULT_SDMA,
DBG_TEST_VMFAULT_SHADER,
DBG_TEST_CLEARBUF_PERF,
};
#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
@@ -1110,10 +1111,15 @@ enum si_coherency {
SI_COHERENCY_CB_META,
};
enum si_method {
SI_METHOD_CP_DMA,
SI_METHOD_BEST,
};
void si_cp_dma_wait_for_idle(struct si_context *sctx);
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value,
enum si_coherency coher);
enum si_coherency coher, enum si_method xfer);
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
uint64_t dst_offset, uint64_t src_offset, unsigned size,
@@ -1199,6 +1205,9 @@ void si_resume_queries(struct si_context *sctx);
/* si_test_dma.c */
void si_test_dma(struct si_screen *sscreen);
/* si_test_clearbuffer.c */
void si_test_clearbuffer(struct si_screen *sscreen);
/* si_uvd.c */
struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ);

View File

@@ -0,0 +1,140 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
/* This file implements tests on the si_clearbuffer function. */
#include "si_pipe.h"
#define CLEARBUF_MIN 32
#define CLEARBUF_COUNT 16
#define CLEARBUF_MEMSZ 1024
static uint64_t
measure_clearbuf_time(struct pipe_context *ctx,
uint64_t memory_size)
{
struct pipe_query *query_te;
union pipe_query_result qresult;
struct pipe_resource *buf;
struct si_context *sctx = (struct si_context*)ctx;
struct pipe_screen *screen = ctx->screen;
buf = pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, memory_size);
query_te = ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0);
ctx->begin_query(ctx, query_te);
/* operation */
si_clear_buffer(sctx, buf, 0, memory_size, 0x00,
SI_COHERENCY_SHADER, SI_METHOD_CP_DMA);
ctx->end_query(ctx, query_te);
ctx->get_query_result(ctx, query_te, true, &qresult);
/* Cleanup. */
ctx->destroy_query(ctx, query_te);
pipe_resource_reference(&buf, NULL);
/* Report Results */
return qresult.u64;
}
/**
* @brief Analyze rate of clearing a 1K Buffer averaged over 16 iterations
* @param ctx Context of pipe to perform analysis on
*/
static void
analyze_clearbuf_perf_avg(struct pipe_context *ctx)
{
uint index = 0;
uint64_t result[CLEARBUF_COUNT];
uint64_t sum = 0;
long long int rate_kBps;
/* Run Tests. */
for (index = 0 ; index < CLEARBUF_COUNT ; index++) {
result[index] = measure_clearbuf_time(ctx, CLEARBUF_MEMSZ);
sum += result[index];
}
/* Calculate Results. */
/* kBps = (size(bytes))/(1000) / (time(ns)/(1000*1000*1000)) */
rate_kBps = CLEARBUF_COUNT*CLEARBUF_MEMSZ;
rate_kBps *= 1000UL*1000UL;
rate_kBps /= sum;
/* Display Results. */
printf("CP DMA clear_buffer performance (buffer %lu ,repeat %u ):",
(uint64_t)CLEARBUF_MEMSZ,
CLEARBUF_COUNT );
printf(" %llu kB/s\n", rate_kBps );
}
/**
* @brief Analyze rate of clearing a range of Buffer sizes
* @param ctx Context of pipe to perform analysis on
*/
static void
analyze_clearbuf_perf_rng(struct pipe_context *ctx)
{
uint index = 0;
uint64_t result[CLEARBUF_COUNT];
uint64_t mem_size;
long long int rate_kBps;
/* Run Tests. */
mem_size = CLEARBUF_MIN;
for (index = 0 ; index < CLEARBUF_COUNT ; index++ ) {
result[index] = measure_clearbuf_time(ctx, mem_size);
mem_size <<= 1;
}
/* Calculate & Display Results. */
/* kBps = (size(bytes))/(1000) / (time(ns)/(1000*1000*1000)) */
mem_size = CLEARBUF_MIN;
for (index = 0 ; index < CLEARBUF_COUNT ; index++ ) {
rate_kBps = mem_size;
rate_kBps *= 1000UL*1000UL;
rate_kBps /= result[index];
printf("CP DMA clear_buffer performance (buffer %lu):",
mem_size );
printf(" %llu kB/s\n", rate_kBps );
mem_size <<= 1;
}
}
void si_test_clearbuffer(struct si_screen *sscreen)
{
struct pipe_screen *screen = &sscreen->b;
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
analyze_clearbuf_perf_avg(ctx);
analyze_clearbuf_perf_rng(ctx);
exit(0);
}

View File

@@ -307,7 +307,8 @@ void si_test_dma(struct si_screen *sscreen)
set_random_pixels(ctx, src, &src_cpu);
/* clear dst pixels */
si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0, true);
si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0,
true, SI_METHOD_BEST);
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
/* preparation */