gallium: distinguish between shader IR in get_compute_param
For radeonsi, native and TGSI use different compilers and this results in different limits for different IR's. The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE and MAX_THREADS_PER_BLOCK params, but I added a few others as shader related that seemed like they would also typically depend on the compiler. Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -436,26 +436,26 @@ pipe_screen::get_compute_param.
|
|||||||
``processor-arch-manufacturer-os`` that will be passed on to the compiler.
|
``processor-arch-manufacturer-os`` that will be passed on to the compiler.
|
||||||
This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
|
This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
|
||||||
or PIPE_SHADER_IR_NATIVE for their preferred IR.
|
or PIPE_SHADER_IR_NATIVE for their preferred IR.
|
||||||
Value type: null-terminated string.
|
Value type: null-terminated string. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
|
* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
|
||||||
for grid and block coordinates. Value type: ``uint64_t``.
|
for grid and block coordinates. Value type: ``uint64_t``. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
|
* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
|
||||||
units. Value type: ``uint64_t []``.
|
units. Value type: ``uint64_t []``. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
|
* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
|
||||||
units. Value type: ``uint64_t []``.
|
units. Value type: ``uint64_t []``. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
|
* ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
|
||||||
a single block can contain. Value type: ``uint64_t``.
|
a single block can contain. Value type: ``uint64_t``. Shader IR type dependent.
|
||||||
This may be less than the product of the components of MAX_BLOCK_SIZE and is
|
This may be less than the product of the components of MAX_BLOCK_SIZE and is
|
||||||
usually limited by the number of threads that can be resident simultaneously
|
usually limited by the number of threads that can be resident simultaneously
|
||||||
on a compute unit.
|
on a compute unit.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
|
* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
|
||||||
resource. Value type: ``uint64_t``.
|
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
|
* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
|
||||||
resource. Value type: ``uint64_t``.
|
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
|
* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
|
||||||
resource. Value type: ``uint64_t``.
|
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
|
* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
|
||||||
resource. Value type: ``uint64_t``.
|
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
|
* ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
|
||||||
allocation in bytes. Value type: ``uint64_t``.
|
allocation in bytes. Value type: ``uint64_t``.
|
||||||
* ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU
|
* ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU
|
||||||
|
@@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
ilo_get_compute_param(struct pipe_screen *screen,
|
ilo_get_compute_param(struct pipe_screen *screen,
|
||||||
|
enum pipe_shader_ir ir_type,
|
||||||
enum pipe_compute_cap param,
|
enum pipe_compute_cap param,
|
||||||
void *ret)
|
void *ret)
|
||||||
{
|
{
|
||||||
|
@@ -368,6 +368,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
nv50_screen_get_compute_param(struct pipe_screen *pscreen,
|
nv50_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||||
|
enum pipe_shader_ir ir_type,
|
||||||
enum pipe_compute_cap param, void *data)
|
enum pipe_compute_cap param, void *data)
|
||||||
{
|
{
|
||||||
struct nv50_screen *screen = nv50_screen(pscreen);
|
struct nv50_screen *screen = nv50_screen(pscreen);
|
||||||
|
@@ -395,6 +395,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||||
|
enum pipe_shader_ir ir_type,
|
||||||
enum pipe_compute_cap param, void *data)
|
enum pipe_compute_cap param, void *data)
|
||||||
{
|
{
|
||||||
struct nvc0_screen *screen = nvc0_screen(pscreen);
|
struct nvc0_screen *screen = nvc0_screen(pscreen);
|
||||||
|
@@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
|||||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||||
if (shader == PIPE_SHADER_COMPUTE) {
|
if (shader == PIPE_SHADER_COMPUTE) {
|
||||||
uint64_t max_const_buffer_size;
|
uint64_t max_const_buffer_size;
|
||||||
pscreen->get_compute_param(pscreen,
|
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
|
||||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||||
&max_const_buffer_size);
|
&max_const_buffer_size);
|
||||||
return max_const_buffer_size;
|
return max_const_buffer_size;
|
||||||
|
@@ -612,6 +612,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int r600_get_compute_param(struct pipe_screen *screen,
|
static int r600_get_compute_param(struct pipe_screen *screen,
|
||||||
|
enum pipe_shader_ir ir_type,
|
||||||
enum pipe_compute_cap param,
|
enum pipe_compute_cap param,
|
||||||
void *ret)
|
void *ret)
|
||||||
{
|
{
|
||||||
@@ -678,7 +679,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
|||||||
uint64_t *max_global_size = ret;
|
uint64_t *max_global_size = ret;
|
||||||
uint64_t max_mem_alloc_size;
|
uint64_t max_mem_alloc_size;
|
||||||
|
|
||||||
r600_get_compute_param(screen,
|
r600_get_compute_param(screen, ir_type,
|
||||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||||
&max_mem_alloc_size);
|
&max_mem_alloc_size);
|
||||||
|
|
||||||
|
@@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
|||||||
|
|
||||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
|
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
|
||||||
uint64_t max_const_buffer_size;
|
uint64_t max_const_buffer_size;
|
||||||
pscreen->get_compute_param(pscreen,
|
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
|
||||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||||
&max_const_buffer_size);
|
&max_const_buffer_size);
|
||||||
return max_const_buffer_size;
|
return max_const_buffer_size;
|
||||||
|
@@ -175,6 +175,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
trace_screen_get_compute_param(struct pipe_screen *_screen,
|
trace_screen_get_compute_param(struct pipe_screen *_screen,
|
||||||
|
enum pipe_shader_ir ir_type,
|
||||||
enum pipe_compute_cap param, void *data)
|
enum pipe_compute_cap param, void *data)
|
||||||
{
|
{
|
||||||
struct trace_screen *tr_scr = trace_screen(_screen);
|
struct trace_screen *tr_scr = trace_screen(_screen);
|
||||||
@@ -184,10 +185,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen,
|
|||||||
trace_dump_call_begin("pipe_screen", "get_compute_param");
|
trace_dump_call_begin("pipe_screen", "get_compute_param");
|
||||||
|
|
||||||
trace_dump_arg(ptr, screen);
|
trace_dump_arg(ptr, screen);
|
||||||
|
trace_dump_arg(int, ir_type);
|
||||||
trace_dump_arg(int, param);
|
trace_dump_arg(int, param);
|
||||||
trace_dump_arg(ptr, data);
|
trace_dump_arg(ptr, data);
|
||||||
|
|
||||||
result = screen->get_compute_param(screen, param, data);
|
result = screen->get_compute_param(screen, ir_type, param, data);
|
||||||
|
|
||||||
trace_dump_ret(int, result);
|
trace_dump_ret(int, result);
|
||||||
|
|
||||||
|
@@ -109,6 +109,8 @@ struct pipe_screen {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Query a compute-specific capability/parameter/limit.
|
* Query a compute-specific capability/parameter/limit.
|
||||||
|
* \param ir_type shader IR type for which the param applies, or don't care
|
||||||
|
* if the param is not shader related
|
||||||
* \param param one of PIPE_COMPUTE_CAP_x
|
* \param param one of PIPE_COMPUTE_CAP_x
|
||||||
* \param ret pointer to a preallocated buffer that will be
|
* \param ret pointer to a preallocated buffer that will be
|
||||||
* initialized to the parameter value, or NULL.
|
* initialized to the parameter value, or NULL.
|
||||||
@@ -116,6 +118,7 @@ struct pipe_screen {
|
|||||||
* returned.
|
* returned.
|
||||||
*/
|
*/
|
||||||
int (*get_compute_param)(struct pipe_screen *,
|
int (*get_compute_param)(struct pipe_screen *,
|
||||||
|
enum pipe_shader_ir ir_type,
|
||||||
enum pipe_compute_cap param,
|
enum pipe_compute_cap param,
|
||||||
void *ret);
|
void *ret);
|
||||||
|
|
||||||
|
@@ -30,11 +30,12 @@ using namespace clover;
|
|||||||
namespace {
|
namespace {
|
||||||
template<typename T>
|
template<typename T>
|
||||||
std::vector<T>
|
std::vector<T>
|
||||||
get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
|
get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
|
||||||
int sz = pipe->get_compute_param(pipe, cap, NULL);
|
pipe_compute_cap cap) {
|
||||||
|
int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
|
||||||
std::vector<T> v(sz / sizeof(T));
|
std::vector<T> v(sz / sizeof(T));
|
||||||
|
|
||||||
pipe->get_compute_param(pipe, cap, &v.front());
|
pipe->get_compute_param(pipe, ir_format, cap, &v.front());
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -115,19 +116,19 @@ device::max_samplers() const {
|
|||||||
|
|
||||||
cl_ulong
|
cl_ulong
|
||||||
device::max_mem_global() const {
|
device::max_mem_global() const {
|
||||||
return get_compute_param<uint64_t>(pipe,
|
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||||
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
|
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_ulong
|
cl_ulong
|
||||||
device::max_mem_local() const {
|
device::max_mem_local() const {
|
||||||
return get_compute_param<uint64_t>(pipe,
|
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||||
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
|
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_ulong
|
cl_ulong
|
||||||
device::max_mem_input() const {
|
device::max_mem_input() const {
|
||||||
return get_compute_param<uint64_t>(pipe,
|
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||||
PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
|
PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,30 +147,30 @@ device::max_const_buffers() const {
|
|||||||
size_t
|
size_t
|
||||||
device::max_threads_per_block() const {
|
device::max_threads_per_block() const {
|
||||||
return get_compute_param<uint64_t>(
|
return get_compute_param<uint64_t>(
|
||||||
pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
|
pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_ulong
|
cl_ulong
|
||||||
device::max_mem_alloc_size() const {
|
device::max_mem_alloc_size() const {
|
||||||
return get_compute_param<uint64_t>(pipe,
|
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
|
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_uint
|
cl_uint
|
||||||
device::max_clock_frequency() const {
|
device::max_clock_frequency() const {
|
||||||
return get_compute_param<uint32_t>(pipe,
|
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||||
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
|
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_uint
|
cl_uint
|
||||||
device::max_compute_units() const {
|
device::max_compute_units() const {
|
||||||
return get_compute_param<uint32_t>(pipe,
|
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||||
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
|
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
device::image_support() const {
|
device::image_support() const {
|
||||||
return get_compute_param<uint32_t>(pipe,
|
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||||
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
|
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -181,13 +182,15 @@ device::has_doubles() const {
|
|||||||
|
|
||||||
std::vector<size_t>
|
std::vector<size_t>
|
||||||
device::max_block_size() const {
|
device::max_block_size() const {
|
||||||
auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
|
auto v = get_compute_param<uint64_t>(pipe, ir_format(),
|
||||||
|
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
|
||||||
return { v.begin(), v.end() };
|
return { v.begin(), v.end() };
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_uint
|
cl_uint
|
||||||
device::subgroup_size() const {
|
device::subgroup_size() const {
|
||||||
return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||||
|
PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
@@ -209,7 +212,7 @@ device::ir_format() const {
|
|||||||
std::string
|
std::string
|
||||||
device::ir_target() const {
|
device::ir_target() const {
|
||||||
std::vector<char> target = get_compute_param<char>(
|
std::vector<char> target = get_compute_param<char>(
|
||||||
pipe, PIPE_COMPUTE_CAP_IR_TARGET);
|
pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
|
||||||
return { target.data() };
|
return { target.data() };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -58,7 +58,9 @@ struct context {
|
|||||||
uint64_t __v[4]; \
|
uint64_t __v[4]; \
|
||||||
int __i, __n; \
|
int __i, __n; \
|
||||||
\
|
\
|
||||||
__n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
|
__n = ctx->screen->get_compute_param(ctx->screen, \
|
||||||
|
PIPE_SHADER_IR_TGSI, \
|
||||||
|
c, __v); \
|
||||||
printf("%s: {", #c); \
|
printf("%s: {", #c); \
|
||||||
\
|
\
|
||||||
for (__i = 0; __i < __n / sizeof(*__v); ++__i) \
|
for (__i = 0; __i < __n / sizeof(*__v); ++__i) \
|
||||||
|
@@ -1124,14 +1124,15 @@ void st_init_extensions(struct pipe_screen *screen,
|
|||||||
if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
|
if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
|
||||||
uint64_t grid_size[3], block_size[3];
|
uint64_t grid_size[3], block_size[3];
|
||||||
|
|
||||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
|
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||||
grid_size);
|
PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
|
||||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
|
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||||
block_size);
|
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
|
||||||
screen->get_compute_param(screen,
|
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||||
PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
|
PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
|
||||||
&consts->MaxComputeWorkGroupInvocations);
|
&consts->MaxComputeWorkGroupInvocations);
|
||||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
|
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||||
|
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
|
||||||
&consts->MaxComputeSharedMemorySize);
|
&consts->MaxComputeSharedMemorySize);
|
||||||
|
|
||||||
for (i = 0; i < 3; i++) {
|
for (i = 0; i < 3; i++) {
|
||||||
|
Reference in New Issue
Block a user