radeonsi: add new R600_DEBUG test "testclearbufperf"
Signed-off-by: Darren Powell <darren.powell@amd.com> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:

committed by
Marek Olšák

parent
977638006b
commit
726a48c94f
@@ -43,6 +43,7 @@ C_SOURCES := \
|
|||||||
si_state_streamout.c \
|
si_state_streamout.c \
|
||||||
si_state_viewport.c \
|
si_state_viewport.c \
|
||||||
si_state.h \
|
si_state.h \
|
||||||
|
si_test_clearbuffer.c \
|
||||||
si_test_dma.c \
|
si_test_dma.c \
|
||||||
si_texture.c \
|
si_texture.c \
|
||||||
si_uvd.c \
|
si_uvd.c \
|
||||||
|
@@ -59,6 +59,7 @@ files_libradeonsi = files(
|
|||||||
'si_state_shaders.c',
|
'si_state_shaders.c',
|
||||||
'si_state_streamout.c',
|
'si_state_streamout.c',
|
||||||
'si_state_viewport.c',
|
'si_state_viewport.c',
|
||||||
|
'si_test_clearbuffer.c',
|
||||||
'si_test_dma.c',
|
'si_test_dma.c',
|
||||||
'si_texture.c',
|
'si_texture.c',
|
||||||
'si_uvd.c',
|
'si_uvd.c',
|
||||||
|
@@ -256,7 +256,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
|
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
|
||||||
clear_value, SI_COHERENCY_CB_META);
|
clear_value, SI_COHERENCY_CB_META, SI_METHOD_BEST);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the same micro tile mode as the destination of the last MSAA resolve.
|
/* Set the same micro tile mode as the destination of the last MSAA resolve.
|
||||||
@@ -489,7 +489,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
|
|||||||
|
|
||||||
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
|
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
|
||||||
tex->cmask_offset, tex->surface.cmask_size,
|
tex->cmask_offset, tex->surface.cmask_size,
|
||||||
0xCCCCCCCC, SI_COHERENCY_CB_META);
|
0xCCCCCCCC, SI_COHERENCY_CB_META, SI_METHOD_BEST);
|
||||||
need_decompress_pass = true;
|
need_decompress_pass = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -520,7 +520,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
|
|||||||
/* Do the fast clear. */
|
/* Do the fast clear. */
|
||||||
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
|
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
|
||||||
tex->cmask_offset, tex->surface.cmask_size, 0,
|
tex->cmask_offset, tex->surface.cmask_size, 0,
|
||||||
SI_COHERENCY_CB_META);
|
SI_COHERENCY_CB_META, SI_METHOD_BEST);
|
||||||
need_decompress_pass = true;
|
need_decompress_pass = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -215,7 +215,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
|
|||||||
|
|
||||||
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||||
uint64_t offset, uint64_t size, unsigned value,
|
uint64_t offset, uint64_t size, unsigned value,
|
||||||
enum si_coherency coher)
|
enum si_coherency coher, enum si_method xfer )
|
||||||
{
|
{
|
||||||
struct radeon_winsys *ws = sctx->ws;
|
struct radeon_winsys *ws = sctx->ws;
|
||||||
struct r600_resource *rdst = r600_resource(dst);
|
struct r600_resource *rdst = r600_resource(dst);
|
||||||
@@ -227,7 +227,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
|||||||
if (!size)
|
if (!size)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
dma_clear_size = size & ~3ull;
|
dma_clear_size = size & ~3ull;
|
||||||
|
|
||||||
/* Mark the buffer range of destination as valid (initialized),
|
/* Mark the buffer range of destination as valid (initialized),
|
||||||
* so that transfer_map knows it should wait for the GPU when mapping
|
* so that transfer_map knows it should wait for the GPU when mapping
|
||||||
@@ -250,7 +250,9 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
|||||||
* For example, DeusEx:MD has 21 buffer clears per frame and all
|
* For example, DeusEx:MD has 21 buffer clears per frame and all
|
||||||
* of them are moved to SDMA thanks to this. */
|
* of them are moved to SDMA thanks to this. */
|
||||||
!ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf,
|
!ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf,
|
||||||
RADEON_USAGE_READWRITE))) {
|
RADEON_USAGE_READWRITE)) &&
|
||||||
|
/* bypass sdma transfer with param xfer */
|
||||||
|
(xfer != SI_METHOD_CP_DMA)) {
|
||||||
sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
|
sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
|
||||||
|
|
||||||
offset += dma_clear_size;
|
offset += dma_clear_size;
|
||||||
@@ -263,7 +265,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
|||||||
|
|
||||||
/* Flush the caches. */
|
/* Flush the caches. */
|
||||||
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
|
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
|
||||||
SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
|
SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
|
||||||
|
|
||||||
while (dma_clear_size) {
|
while (dma_clear_size) {
|
||||||
unsigned byte_count = MIN2(dma_clear_size, cp_dma_max_byte_count(sctx));
|
unsigned byte_count = MIN2(dma_clear_size, cp_dma_max_byte_count(sctx));
|
||||||
@@ -356,7 +358,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
si_clear_buffer(sctx, dst, offset, size, dword_value,
|
si_clear_buffer(sctx, dst, offset, size, dword_value,
|
||||||
SI_COHERENCY_SHADER);
|
SI_COHERENCY_SHADER, SI_METHOD_BEST);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -101,6 +101,7 @@ static const struct debug_named_value debug_options[] = {
|
|||||||
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
|
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
|
||||||
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
|
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
|
||||||
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
|
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
|
||||||
|
{ "testclearbufperf", DBG(TEST_CLEARBUF_PERF), "Test Clearbuffer Performance" },
|
||||||
|
|
||||||
DEBUG_NAMED_VALUE_END /* must be last */
|
DEBUG_NAMED_VALUE_END /* must be last */
|
||||||
};
|
};
|
||||||
@@ -545,7 +546,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
|||||||
/* Clear the NULL constant buffer, because loads should return zeros. */
|
/* Clear the NULL constant buffer, because loads should return zeros. */
|
||||||
si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
|
si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
|
||||||
sctx->null_const_buf.buffer->width0, 0,
|
sctx->null_const_buf.buffer->width0, 0,
|
||||||
SI_COHERENCY_SHADER);
|
SI_COHERENCY_SHADER, SI_METHOD_BEST);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t max_threads_per_block;
|
uint64_t max_threads_per_block;
|
||||||
@@ -1069,6 +1070,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
|||||||
if (sscreen->debug_flags & DBG(TEST_DMA))
|
if (sscreen->debug_flags & DBG(TEST_DMA))
|
||||||
si_test_dma(sscreen);
|
si_test_dma(sscreen);
|
||||||
|
|
||||||
|
if (sscreen->debug_flags & DBG(TEST_CLEARBUF_PERF)) {
|
||||||
|
si_test_clearbuffer(sscreen);
|
||||||
|
}
|
||||||
|
|
||||||
if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
|
if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
|
||||||
DBG(TEST_VMFAULT_SDMA) |
|
DBG(TEST_VMFAULT_SDMA) |
|
||||||
DBG(TEST_VMFAULT_SHADER)))
|
DBG(TEST_VMFAULT_SHADER)))
|
||||||
|
@@ -165,6 +165,7 @@ enum {
|
|||||||
DBG_TEST_VMFAULT_CP,
|
DBG_TEST_VMFAULT_CP,
|
||||||
DBG_TEST_VMFAULT_SDMA,
|
DBG_TEST_VMFAULT_SDMA,
|
||||||
DBG_TEST_VMFAULT_SHADER,
|
DBG_TEST_VMFAULT_SHADER,
|
||||||
|
DBG_TEST_CLEARBUF_PERF,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
|
#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
|
||||||
@@ -1110,10 +1111,15 @@ enum si_coherency {
|
|||||||
SI_COHERENCY_CB_META,
|
SI_COHERENCY_CB_META,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum si_method {
|
||||||
|
SI_METHOD_CP_DMA,
|
||||||
|
SI_METHOD_BEST,
|
||||||
|
};
|
||||||
|
|
||||||
void si_cp_dma_wait_for_idle(struct si_context *sctx);
|
void si_cp_dma_wait_for_idle(struct si_context *sctx);
|
||||||
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||||
uint64_t offset, uint64_t size, unsigned value,
|
uint64_t offset, uint64_t size, unsigned value,
|
||||||
enum si_coherency coher);
|
enum si_coherency coher, enum si_method xfer);
|
||||||
void si_copy_buffer(struct si_context *sctx,
|
void si_copy_buffer(struct si_context *sctx,
|
||||||
struct pipe_resource *dst, struct pipe_resource *src,
|
struct pipe_resource *dst, struct pipe_resource *src,
|
||||||
uint64_t dst_offset, uint64_t src_offset, unsigned size,
|
uint64_t dst_offset, uint64_t src_offset, unsigned size,
|
||||||
@@ -1199,6 +1205,9 @@ void si_resume_queries(struct si_context *sctx);
|
|||||||
/* si_test_dma.c */
|
/* si_test_dma.c */
|
||||||
void si_test_dma(struct si_screen *sscreen);
|
void si_test_dma(struct si_screen *sscreen);
|
||||||
|
|
||||||
|
/* si_test_clearbuffer.c */
|
||||||
|
void si_test_clearbuffer(struct si_screen *sscreen);
|
||||||
|
|
||||||
/* si_uvd.c */
|
/* si_uvd.c */
|
||||||
struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
|
struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
|
||||||
const struct pipe_video_codec *templ);
|
const struct pipe_video_codec *templ);
|
||||||
|
140
src/gallium/drivers/radeonsi/si_test_clearbuffer.c
Normal file
140
src/gallium/drivers/radeonsi/si_test_clearbuffer.c
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file implements tests on the si_clearbuffer function. */
|
||||||
|
|
||||||
|
#include "si_pipe.h"
|
||||||
|
|
||||||
|
#define CLEARBUF_MIN 32
|
||||||
|
#define CLEARBUF_COUNT 16
|
||||||
|
#define CLEARBUF_MEMSZ 1024
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
measure_clearbuf_time(struct pipe_context *ctx,
|
||||||
|
uint64_t memory_size)
|
||||||
|
{
|
||||||
|
struct pipe_query *query_te;
|
||||||
|
union pipe_query_result qresult;
|
||||||
|
struct pipe_resource *buf;
|
||||||
|
|
||||||
|
struct si_context *sctx = (struct si_context*)ctx;
|
||||||
|
struct pipe_screen *screen = ctx->screen;
|
||||||
|
|
||||||
|
buf = pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, memory_size);
|
||||||
|
|
||||||
|
query_te = ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0);
|
||||||
|
|
||||||
|
ctx->begin_query(ctx, query_te);
|
||||||
|
/* operation */
|
||||||
|
si_clear_buffer(sctx, buf, 0, memory_size, 0x00,
|
||||||
|
SI_COHERENCY_SHADER, SI_METHOD_CP_DMA);
|
||||||
|
ctx->end_query(ctx, query_te);
|
||||||
|
ctx->get_query_result(ctx, query_te, true, &qresult);
|
||||||
|
|
||||||
|
/* Cleanup. */
|
||||||
|
ctx->destroy_query(ctx, query_te);
|
||||||
|
pipe_resource_reference(&buf, NULL);
|
||||||
|
|
||||||
|
/* Report Results */
|
||||||
|
return qresult.u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Analyze rate of clearing a 1K Buffer averaged over 16 iterations
|
||||||
|
* @param ctx Context of pipe to perform analysis on
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
analyze_clearbuf_perf_avg(struct pipe_context *ctx)
|
||||||
|
{
|
||||||
|
uint index = 0;
|
||||||
|
uint64_t result[CLEARBUF_COUNT];
|
||||||
|
uint64_t sum = 0;
|
||||||
|
long long int rate_kBps;
|
||||||
|
|
||||||
|
/* Run Tests. */
|
||||||
|
for (index = 0 ; index < CLEARBUF_COUNT ; index++) {
|
||||||
|
result[index] = measure_clearbuf_time(ctx, CLEARBUF_MEMSZ);
|
||||||
|
sum += result[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Calculate Results. */
|
||||||
|
/* kBps = (size(bytes))/(1000) / (time(ns)/(1000*1000*1000)) */
|
||||||
|
rate_kBps = CLEARBUF_COUNT*CLEARBUF_MEMSZ;
|
||||||
|
rate_kBps *= 1000UL*1000UL;
|
||||||
|
rate_kBps /= sum;
|
||||||
|
|
||||||
|
/* Display Results. */
|
||||||
|
printf("CP DMA clear_buffer performance (buffer %lu ,repeat %u ):",
|
||||||
|
(uint64_t)CLEARBUF_MEMSZ,
|
||||||
|
CLEARBUF_COUNT );
|
||||||
|
printf(" %llu kB/s\n", rate_kBps );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Analyze rate of clearing a range of Buffer sizes
|
||||||
|
* @param ctx Context of pipe to perform analysis on
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
analyze_clearbuf_perf_rng(struct pipe_context *ctx)
|
||||||
|
{
|
||||||
|
uint index = 0;
|
||||||
|
uint64_t result[CLEARBUF_COUNT];
|
||||||
|
uint64_t mem_size;
|
||||||
|
long long int rate_kBps;
|
||||||
|
|
||||||
|
/* Run Tests. */
|
||||||
|
mem_size = CLEARBUF_MIN;
|
||||||
|
for (index = 0 ; index < CLEARBUF_COUNT ; index++ ) {
|
||||||
|
result[index] = measure_clearbuf_time(ctx, mem_size);
|
||||||
|
mem_size <<= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Calculate & Display Results. */
|
||||||
|
/* kBps = (size(bytes))/(1000) / (time(ns)/(1000*1000*1000)) */
|
||||||
|
mem_size = CLEARBUF_MIN;
|
||||||
|
for (index = 0 ; index < CLEARBUF_COUNT ; index++ ) {
|
||||||
|
rate_kBps = mem_size;
|
||||||
|
rate_kBps *= 1000UL*1000UL;
|
||||||
|
rate_kBps /= result[index];
|
||||||
|
|
||||||
|
printf("CP DMA clear_buffer performance (buffer %lu):",
|
||||||
|
mem_size );
|
||||||
|
printf(" %llu kB/s\n", rate_kBps );
|
||||||
|
|
||||||
|
mem_size <<= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void si_test_clearbuffer(struct si_screen *sscreen)
|
||||||
|
{
|
||||||
|
struct pipe_screen *screen = &sscreen->b;
|
||||||
|
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
|
||||||
|
|
||||||
|
analyze_clearbuf_perf_avg(ctx);
|
||||||
|
analyze_clearbuf_perf_rng(ctx);
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
@@ -307,7 +307,8 @@ void si_test_dma(struct si_screen *sscreen)
|
|||||||
set_random_pixels(ctx, src, &src_cpu);
|
set_random_pixels(ctx, src, &src_cpu);
|
||||||
|
|
||||||
/* clear dst pixels */
|
/* clear dst pixels */
|
||||||
si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0, true);
|
si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0,
|
||||||
|
true, SI_METHOD_BEST);
|
||||||
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
|
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
|
||||||
|
|
||||||
/* preparation */
|
/* preparation */
|
||||||
|
Reference in New Issue
Block a user