gallium: distinguish between shader IR in get_compute_param

For radeonsi, native and TGSI use different compilers and this results
in different limits for different IR's.

The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE
and MAX_THREADS_PER_BLOCK params, but I added a few others as shader
related that seemed like they would also typically depend on the
compiler.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Bas Nieuwenhuizen
2016-03-25 02:06:50 +01:00
parent be5899dcf9
commit 1a5c8c24b5
12 changed files with 54 additions and 39 deletions

View File

@@ -436,26 +436,26 @@ pipe_screen::get_compute_param.
``processor-arch-manufacturer-os`` that will be passed on to the compiler.
This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
or PIPE_SHADER_IR_NATIVE for their preferred IR.
Value type: null-terminated string.
Value type: null-terminated string. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
for grid and block coordinates. Value type: ``uint64_t``.
for grid and block coordinates. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
units. Value type: ``uint64_t []``.
units. Value type: ``uint64_t []``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
units. Value type: ``uint64_t []``.
units. Value type: ``uint64_t []``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
a single block can contain. Value type: ``uint64_t``.
a single block can contain. Value type: ``uint64_t``. Shader IR type dependent.
This may be less than the product of the components of MAX_BLOCK_SIZE and is
usually limited by the number of threads that can be resident simultaneously
on a compute unit.
* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
allocation in bytes. Value type: ``uint64_t``.
* ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU

View File

@@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
static int
ilo_get_compute_param(struct pipe_screen *screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret)
{

View File

@@ -368,6 +368,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
static int
nv50_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *data)
{
struct nv50_screen *screen = nv50_screen(pscreen);

View File

@@ -395,6 +395,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
static int
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *data)
{
struct nvc0_screen *screen = nvc0_screen(pscreen);

View File

@@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
if (shader == PIPE_SHADER_COMPUTE) {
uint64_t max_const_buffer_size;
pscreen->get_compute_param(pscreen,
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_const_buffer_size);
return max_const_buffer_size;

View File

@@ -612,6 +612,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
}
static int r600_get_compute_param(struct pipe_screen *screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret)
{
@@ -678,7 +679,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
uint64_t *max_global_size = ret;
uint64_t max_mem_alloc_size;
r600_get_compute_param(screen,
r600_get_compute_param(screen, ir_type,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_mem_alloc_size);

View File

@@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
uint64_t max_const_buffer_size;
pscreen->get_compute_param(pscreen,
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_const_buffer_size);
return max_const_buffer_size;

View File

@@ -175,6 +175,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
static int
trace_screen_get_compute_param(struct pipe_screen *_screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *data)
{
struct trace_screen *tr_scr = trace_screen(_screen);
@@ -184,10 +185,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen,
trace_dump_call_begin("pipe_screen", "get_compute_param");
trace_dump_arg(ptr, screen);
trace_dump_arg(int, ir_type);
trace_dump_arg(int, param);
trace_dump_arg(ptr, data);
result = screen->get_compute_param(screen, param, data);
result = screen->get_compute_param(screen, ir_type, param, data);
trace_dump_ret(int, result);

View File

@@ -109,6 +109,8 @@ struct pipe_screen {
/**
* Query a compute-specific capability/parameter/limit.
* \param ir_type shader IR type for which the param applies, or don't care
* if the param is not shader related
* \param param one of PIPE_COMPUTE_CAP_x
* \param ret pointer to a preallocated buffer that will be
* initialized to the parameter value, or NULL.
@@ -116,6 +118,7 @@ struct pipe_screen {
* returned.
*/
int (*get_compute_param)(struct pipe_screen *,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret);

View File

@@ -30,11 +30,12 @@ using namespace clover;
namespace {
template<typename T>
std::vector<T>
get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
int sz = pipe->get_compute_param(pipe, cap, NULL);
get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
pipe_compute_cap cap) {
int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
std::vector<T> v(sz / sizeof(T));
pipe->get_compute_param(pipe, cap, &v.front());
pipe->get_compute_param(pipe, ir_format, cap, &v.front());
return v;
}
}
@@ -115,19 +116,19 @@ device::max_samplers() const {
cl_ulong
device::max_mem_global() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
}
cl_ulong
device::max_mem_local() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
}
cl_ulong
device::max_mem_input() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
}
@@ -146,30 +147,30 @@ device::max_const_buffers() const {
size_t
device::max_threads_per_block() const {
return get_compute_param<uint64_t>(
pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
}
cl_ulong
device::max_mem_alloc_size() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
}
cl_uint
device::max_clock_frequency() const {
return get_compute_param<uint32_t>(pipe,
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
}
cl_uint
device::max_compute_units() const {
return get_compute_param<uint32_t>(pipe,
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
}
bool
device::image_support() const {
return get_compute_param<uint32_t>(pipe,
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
}
@@ -181,13 +182,15 @@ device::has_doubles() const {
std::vector<size_t>
device::max_block_size() const {
auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
auto v = get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
return { v.begin(), v.end() };
}
cl_uint
device::subgroup_size() const {
return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
}
std::string
@@ -209,7 +212,7 @@ device::ir_format() const {
std::string
device::ir_target() const {
std::vector<char> target = get_compute_param<char>(
pipe, PIPE_COMPUTE_CAP_IR_TARGET);
pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
return { target.data() };
}

View File

@@ -58,7 +58,9 @@ struct context {
uint64_t __v[4]; \
int __i, __n; \
\
__n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
__n = ctx->screen->get_compute_param(ctx->screen, \
PIPE_SHADER_IR_TGSI, \
c, __v); \
printf("%s: {", #c); \
\
for (__i = 0; __i < __n / sizeof(*__v); ++__i) \

View File

@@ -1124,14 +1124,15 @@ void st_init_extensions(struct pipe_screen *screen,
if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
uint64_t grid_size[3], block_size[3];
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
grid_size);
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
block_size);
screen->get_compute_param(screen,
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
&consts->MaxComputeWorkGroupInvocations);
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
&consts->MaxComputeSharedMemorySize);
for (i = 0; i < 3; i++) {