gallium: distinguish between shader IR in get_compute_param
For radeonsi, native and TGSI use different compilers and this results in different limits for different IR's. The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE and MAX_THREADS_PER_BLOCK params, but I added a few others as shader related that seemed like they would also typically depend on the compiler. Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -436,26 +436,26 @@ pipe_screen::get_compute_param.
|
||||
``processor-arch-manufacturer-os`` that will be passed on to the compiler.
|
||||
This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
|
||||
or PIPE_SHADER_IR_NATIVE for their preferred IR.
|
||||
Value type: null-terminated string.
|
||||
Value type: null-terminated string. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
|
||||
for grid and block coordinates. Value type: ``uint64_t``.
|
||||
for grid and block coordinates. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
|
||||
units. Value type: ``uint64_t []``.
|
||||
units. Value type: ``uint64_t []``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
|
||||
units. Value type: ``uint64_t []``.
|
||||
units. Value type: ``uint64_t []``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
|
||||
a single block can contain. Value type: ``uint64_t``.
|
||||
a single block can contain. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
This may be less than the product of the components of MAX_BLOCK_SIZE and is
|
||||
usually limited by the number of threads that can be resident simultaneously
|
||||
on a compute unit.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
|
||||
allocation in bytes. Value type: ``uint64_t``.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU
|
||||
|
@@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
|
||||
|
||||
static int
|
||||
ilo_get_compute_param(struct pipe_screen *screen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret)
|
||||
{
|
||||
|
@@ -368,6 +368,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
||||
|
||||
static int
|
||||
nv50_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
struct nv50_screen *screen = nv50_screen(pscreen);
|
||||
|
@@ -395,6 +395,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
||||
|
||||
static int
|
||||
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0_screen(pscreen);
|
||||
|
@@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
if (shader == PIPE_SHADER_COMPUTE) {
|
||||
uint64_t max_const_buffer_size;
|
||||
pscreen->get_compute_param(pscreen,
|
||||
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||
&max_const_buffer_size);
|
||||
return max_const_buffer_size;
|
||||
|
@@ -612,6 +612,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
||||
}
|
||||
|
||||
static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret)
|
||||
{
|
||||
@@ -678,7 +679,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
uint64_t *max_global_size = ret;
|
||||
uint64_t max_mem_alloc_size;
|
||||
|
||||
r600_get_compute_param(screen,
|
||||
r600_get_compute_param(screen, ir_type,
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||
&max_mem_alloc_size);
|
||||
|
||||
|
@@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
||||
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
|
||||
uint64_t max_const_buffer_size;
|
||||
pscreen->get_compute_param(pscreen,
|
||||
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||
&max_const_buffer_size);
|
||||
return max_const_buffer_size;
|
||||
|
@@ -175,6 +175,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
|
||||
|
||||
static int
|
||||
trace_screen_get_compute_param(struct pipe_screen *_screen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
struct trace_screen *tr_scr = trace_screen(_screen);
|
||||
@@ -184,10 +185,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen,
|
||||
trace_dump_call_begin("pipe_screen", "get_compute_param");
|
||||
|
||||
trace_dump_arg(ptr, screen);
|
||||
trace_dump_arg(int, ir_type);
|
||||
trace_dump_arg(int, param);
|
||||
trace_dump_arg(ptr, data);
|
||||
|
||||
result = screen->get_compute_param(screen, param, data);
|
||||
result = screen->get_compute_param(screen, ir_type, param, data);
|
||||
|
||||
trace_dump_ret(int, result);
|
||||
|
||||
|
@@ -109,6 +109,8 @@ struct pipe_screen {
|
||||
|
||||
/**
|
||||
* Query a compute-specific capability/parameter/limit.
|
||||
* \param ir_type shader IR type for which the param applies, or don't care
|
||||
* if the param is not shader related
|
||||
* \param param one of PIPE_COMPUTE_CAP_x
|
||||
* \param ret pointer to a preallocated buffer that will be
|
||||
* initialized to the parameter value, or NULL.
|
||||
@@ -116,6 +118,7 @@ struct pipe_screen {
|
||||
* returned.
|
||||
*/
|
||||
int (*get_compute_param)(struct pipe_screen *,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret);
|
||||
|
||||
|
@@ -30,11 +30,12 @@ using namespace clover;
|
||||
namespace {
|
||||
template<typename T>
|
||||
std::vector<T>
|
||||
get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
|
||||
int sz = pipe->get_compute_param(pipe, cap, NULL);
|
||||
get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
|
||||
pipe_compute_cap cap) {
|
||||
int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
|
||||
std::vector<T> v(sz / sizeof(T));
|
||||
|
||||
pipe->get_compute_param(pipe, cap, &v.front());
|
||||
pipe->get_compute_param(pipe, ir_format, cap, &v.front());
|
||||
return v;
|
||||
}
|
||||
}
|
||||
@@ -115,19 +116,19 @@ device::max_samplers() const {
|
||||
|
||||
cl_ulong
|
||||
device::max_mem_global() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
|
||||
}
|
||||
|
||||
cl_ulong
|
||||
device::max_mem_local() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
|
||||
}
|
||||
|
||||
cl_ulong
|
||||
device::max_mem_input() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
|
||||
}
|
||||
|
||||
@@ -146,30 +147,30 @@ device::max_const_buffers() const {
|
||||
size_t
|
||||
device::max_threads_per_block() const {
|
||||
return get_compute_param<uint64_t>(
|
||||
pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
|
||||
pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
|
||||
}
|
||||
|
||||
cl_ulong
|
||||
device::max_mem_alloc_size() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
|
||||
}
|
||||
|
||||
cl_uint
|
||||
device::max_clock_frequency() const {
|
||||
return get_compute_param<uint32_t>(pipe,
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
|
||||
}
|
||||
|
||||
cl_uint
|
||||
device::max_compute_units() const {
|
||||
return get_compute_param<uint32_t>(pipe,
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
|
||||
}
|
||||
|
||||
bool
|
||||
device::image_support() const {
|
||||
return get_compute_param<uint32_t>(pipe,
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
|
||||
}
|
||||
|
||||
@@ -181,13 +182,15 @@ device::has_doubles() const {
|
||||
|
||||
std::vector<size_t>
|
||||
device::max_block_size() const {
|
||||
auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
|
||||
auto v = get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
|
||||
return { v.begin(), v.end() };
|
||||
}
|
||||
|
||||
cl_uint
|
||||
device::subgroup_size() const {
|
||||
return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
||||
}
|
||||
|
||||
std::string
|
||||
@@ -209,7 +212,7 @@ device::ir_format() const {
|
||||
std::string
|
||||
device::ir_target() const {
|
||||
std::vector<char> target = get_compute_param<char>(
|
||||
pipe, PIPE_COMPUTE_CAP_IR_TARGET);
|
||||
pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
|
||||
return { target.data() };
|
||||
}
|
||||
|
||||
|
@@ -58,7 +58,9 @@ struct context {
|
||||
uint64_t __v[4]; \
|
||||
int __i, __n; \
|
||||
\
|
||||
__n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
|
||||
__n = ctx->screen->get_compute_param(ctx->screen, \
|
||||
PIPE_SHADER_IR_TGSI, \
|
||||
c, __v); \
|
||||
printf("%s: {", #c); \
|
||||
\
|
||||
for (__i = 0; __i < __n / sizeof(*__v); ++__i) \
|
||||
|
@@ -1124,14 +1124,15 @@ void st_init_extensions(struct pipe_screen *screen,
|
||||
if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
|
||||
uint64_t grid_size[3], block_size[3];
|
||||
|
||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
|
||||
grid_size);
|
||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
|
||||
block_size);
|
||||
screen->get_compute_param(screen,
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
|
||||
&consts->MaxComputeWorkGroupInvocations);
|
||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
|
||||
&consts->MaxComputeSharedMemorySize);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
|
Reference in New Issue
Block a user