gallium: add PIPE_COMPUTE_CAP_SUBGROUP_SIZE
We need this to implement OpenCL's CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE. Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
@@ -384,6 +384,8 @@ pipe_screen::get_compute_param.
|
||||
Value type: ``uint32_t``
|
||||
* ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
|
||||
non-zero means yes, zero means no. Value type: ``uint32_t``
|
||||
* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in
|
||||
threads. Also known as wavefront size, warp size or SIMD width.
|
||||
|
||||
.. _pipe_bind:
|
||||
|
||||
|
@@ -193,6 +193,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
|
||||
uint32_t max_clock_frequency;
|
||||
uint32_t max_compute_units;
|
||||
uint32_t images_supported;
|
||||
uint32_t subgroup_size;
|
||||
} val;
|
||||
const void *ptr;
|
||||
int size;
|
||||
@@ -284,6 +285,13 @@ ilo_get_compute_param(struct pipe_screen *screen,
|
||||
ptr = &val.images_supported;
|
||||
size = sizeof(val.images_supported);
|
||||
break;
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
/* best case is actually SIMD32 */
|
||||
val.subgroup_size = 16;
|
||||
|
||||
ptr = &val.subgroup_size;
|
||||
size = sizeof(val.subgroup_size);
|
||||
break;
|
||||
default:
|
||||
ptr = NULL;
|
||||
size = 0;
|
||||
|
@@ -341,6 +341,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
uint64_t *data64 = (uint64_t *)data;
|
||||
uint32_t *data32 = (uint32_t *)data;
|
||||
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
|
||||
|
||||
switch (param) {
|
||||
@@ -372,6 +373,9 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
|
||||
data64[0] = 4096;
|
||||
return 8;
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
data32[0] = 32;
|
||||
return 4;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@@ -636,6 +636,12 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
return sizeof(uint32_t);
|
||||
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
|
||||
break; /* unused */
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
if (ret) {
|
||||
uint32_t *subgroup_size = ret;
|
||||
*subgroup_size = r600_wavefront_size(rscreen->family);
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
}
|
||||
|
||||
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
|
||||
|
@@ -570,6 +570,26 @@ static inline unsigned r600_tex_aniso_filter(unsigned filter)
|
||||
/* else */ return 4;
|
||||
}
|
||||
|
||||
static inline unsigned r600_wavefront_size(enum radeon_family family)
|
||||
{
|
||||
switch (family) {
|
||||
case CHIP_RV610:
|
||||
case CHIP_RS780:
|
||||
case CHIP_RV620:
|
||||
case CHIP_RS880:
|
||||
return 16;
|
||||
case CHIP_RV630:
|
||||
case CHIP_RV635:
|
||||
case CHIP_RV730:
|
||||
case CHIP_RV710:
|
||||
case CHIP_PALM:
|
||||
case CHIP_CEDAR:
|
||||
return 32;
|
||||
default:
|
||||
return 64;
|
||||
}
|
||||
}
|
||||
|
||||
#define COMPUTE_DBG(rscreen, fmt, args...) \
|
||||
do { \
|
||||
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
|
||||
|
@@ -700,7 +700,8 @@ enum pipe_compute_cap
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
|
||||
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
|
||||
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED
|
||||
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
|
||||
PIPE_COMPUTE_CAP_SUBGROUP_SIZE
|
||||
};
|
||||
|
||||
/**
|
||||
|
Reference in New Issue
Block a user