gallium: change PIPE_COMPUTE_CAP_SUBGROUP_SIZE to a bitfield of sizes
This will be required for `cl_intel_required_subgroup_size`, but it already helps implementing OpenCL subgroups as this allows us to check with every subgroup size when implementing `CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT`. Signed-off-by: Karol Herbst <git@karolherbst.de> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22893>
This commit is contained in:
@@ -804,8 +804,9 @@ pipe_screen::get_compute_param.
|
||||
are supported.
|
||||
* ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
|
||||
non-zero means yes, zero means no. Value type: ``uint32_t``
|
||||
* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in
|
||||
threads. Also known as wavefront size, warp size or SIMD width.
|
||||
* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZES``: Ored power of two sizes of a basic execution
|
||||
unit in threads. Also known as wavefront size, warp size or SIMD width.
|
||||
E.g. `64 | 32`.
|
||||
* ``PIPE_COMPUTE_CAP_ADDRESS_BITS``: The default compute device address space
|
||||
size specified as an unsigned integer value in bits.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK``: Maximum variable number
|
||||
|
@@ -1855,7 +1855,7 @@ agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t[]){1});
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t[]){32});
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
|
@@ -589,7 +589,7 @@ crocus_get_compute_param(struct pipe_screen *pscreen,
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t []) { 1 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t []) { BRW_SUBGROUP_SIZE });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
|
@@ -843,7 +843,7 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t[]){1});
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t[]){32}); // TODO
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
|
@@ -617,8 +617,8 @@ iris_get_compute_param(struct pipe_screen *pscreen,
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t []) { 1 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
RET((uint32_t []) { BRW_SUBGROUP_SIZE });
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t []) { 32 | 16 | 8 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
RET((uint32_t []) { devinfo->max_cs_workgroup_threads });
|
||||
|
@@ -531,7 +531,7 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen,
|
||||
return sizeof(uint32_t);
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
return 0;
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
if (ret) {
|
||||
uint32_t *subgroup_size = ret;
|
||||
*subgroup_size = lp_native_vector_width / 32;
|
||||
|
@@ -449,7 +449,7 @@ nv50_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
RET((uint64_t []) { 16 << 10 });
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
|
||||
RET((uint64_t []) { 4096 });
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t []) { 32 });
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
RET((uint32_t []) { 0 });
|
||||
|
@@ -556,7 +556,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
RET((uint64_t []) { 512 << 10 });
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
|
||||
RET((uint64_t []) { 4096 });
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t []) { 32 });
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
RET((uint32_t []) { 0 });
|
||||
|
@@ -751,7 +751,7 @@ panfrost_get_compute_param(struct pipe_screen *pscreen,
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t[]){1});
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t[]){pan_subgroup_size(dev->arch)});
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
|
@@ -1035,7 +1035,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
break; /* unused */
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
if (ret) {
|
||||
uint32_t *subgroup_size = ret;
|
||||
*subgroup_size = r600_wavefront_size(rscreen->family);
|
||||
|
@@ -1117,10 +1117,15 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
}
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
if (ret) {
|
||||
uint32_t *subgroup_size = ret;
|
||||
*subgroup_size = si_determine_wave_size(sscreen, NULL);
|
||||
if (sscreen->debug_flags & DBG(W32_CS))
|
||||
*subgroup_size = 32;
|
||||
else if (sscreen->debug_flags & DBG(W64_CS))
|
||||
*subgroup_size = 64;
|
||||
else
|
||||
*subgroup_size = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
|
@@ -556,7 +556,7 @@ softpipe_get_compute_param(struct pipe_screen *_screen,
|
||||
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
|
||||
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
|
@@ -545,7 +545,7 @@ v3d_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t []) { 1 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t []) { 16 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
|
@@ -437,7 +437,7 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t []) { 1 });
|
||||
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
|
||||
RET((uint32_t []) { screen->info.props11.subgroupSize });
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
|
||||
|
@@ -432,8 +432,11 @@ device::max_block_size() const {
|
||||
|
||||
cl_uint
|
||||
device::subgroup_size() const {
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
||||
cl_uint subgroup_sizes =
|
||||
get_compute_param<uint32_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_SUBGROUP_SIZES)[0];
|
||||
if (!subgroup_sizes)
|
||||
return 0;
|
||||
return 1 << (util_last_bit(subgroup_sizes) - 1);
|
||||
}
|
||||
|
||||
cl_uint
|
||||
|
@@ -849,7 +849,7 @@ impl Device {
|
||||
pub fn subgroup_sizes(&self) -> Vec<usize> {
|
||||
let subgroup_size = ComputeParam::<u32>::compute_param(
|
||||
self.screen.as_ref(),
|
||||
pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZE,
|
||||
pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
|
||||
);
|
||||
|
||||
SetBitIndices::from_msb(subgroup_size)
|
||||
|
@@ -1141,7 +1141,7 @@ enum pipe_compute_cap
|
||||
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
|
||||
PIPE_COMPUTE_CAP_MAX_SUBGROUPS,
|
||||
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
|
||||
PIPE_COMPUTE_CAP_SUBGROUP_SIZE,
|
||||
PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
|
||||
PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK,
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user