gallium: change PIPE_COMPUTE_CAP_SUBGROUP_SIZE to a bitfield of sizes

This will be required for `cl_intel_required_subgroup_size`, but it
already helps implementing OpenCL subgroups as this allows us to check
with every subgroup size when implementing
`CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT`.

Signed-off-by: Karol Herbst <git@karolherbst.de>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22893>
This commit is contained in:
Karol Herbst
2023-05-07 03:16:31 +02:00
committed by Marge Bot
parent 13df23bd67
commit a8df5cfa3a
17 changed files with 30 additions and 21 deletions

View File

@@ -804,8 +804,9 @@ pipe_screen::get_compute_param.
are supported.
* ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
non-zero means yes, zero means no. Value type: ``uint32_t``
* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in
threads. Also known as wavefront size, warp size or SIMD width.
* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZES``: Ored power of two sizes of a basic execution
unit in threads. Also known as wavefront size, warp size or SIMD width.
E.g. `64 | 32`.
* ``PIPE_COMPUTE_CAP_ADDRESS_BITS``: The default compute device address space
size specified as an unsigned integer value in bits.
* ``PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK``: Maximum variable number

View File

@@ -1855,7 +1855,7 @@ agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t[]){1});
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t[]){32});
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:

View File

@@ -589,7 +589,7 @@ crocus_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t []) { 1 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t []) { BRW_SUBGROUP_SIZE });
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:

View File

@@ -843,7 +843,7 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t[]){1});
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t[]){32}); // TODO
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:

View File

@@ -617,8 +617,8 @@ iris_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t []) { 1 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
RET((uint32_t []) { BRW_SUBGROUP_SIZE });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t []) { 32 | 16 | 8 });
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
RET((uint32_t []) { devinfo->max_cs_workgroup_threads });

View File

@@ -531,7 +531,7 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen,
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
return 0;
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
if (ret) {
uint32_t *subgroup_size = ret;
*subgroup_size = lp_native_vector_width / 32;

View File

@@ -449,7 +449,7 @@ nv50_screen_get_compute_param(struct pipe_screen *pscreen,
RET((uint64_t []) { 16 << 10 });
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
RET((uint64_t []) { 4096 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t []) { 32 });
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
RET((uint32_t []) { 0 });

View File

@@ -556,7 +556,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
RET((uint64_t []) { 512 << 10 });
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
RET((uint64_t []) { 4096 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t []) { 32 });
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
RET((uint32_t []) { 0 });

View File

@@ -751,7 +751,7 @@ panfrost_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t[]){1});
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t[]){pan_subgroup_size(dev->arch)});
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:

View File

@@ -1035,7 +1035,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
break; /* unused */
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
if (ret) {
uint32_t *subgroup_size = ret;
*subgroup_size = r600_wavefront_size(rscreen->family);

View File

@@ -1117,10 +1117,15 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
}
return sizeof(uint32_t);
}
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
if (ret) {
uint32_t *subgroup_size = ret;
*subgroup_size = si_determine_wave_size(sscreen, NULL);
if (sscreen->debug_flags & DBG(W32_CS))
*subgroup_size = 32;
else if (sscreen->debug_flags & DBG(W64_CS))
*subgroup_size = 64;
else
*subgroup_size = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;
}
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:

View File

@@ -556,7 +556,7 @@ softpipe_get_compute_param(struct pipe_screen *_screen,
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:

View File

@@ -545,7 +545,7 @@ v3d_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t []) { 1 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t []) { 16 });
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:

View File

@@ -437,7 +437,7 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t []) { 1 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
RET((uint32_t []) { screen->info.props11.subgroupSize });
case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:

View File

@@ -432,8 +432,11 @@ device::max_block_size() const {
cl_uint
device::subgroup_size() const {
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
cl_uint subgroup_sizes =
get_compute_param<uint32_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_SUBGROUP_SIZES)[0];
if (!subgroup_sizes)
return 0;
return 1 << (util_last_bit(subgroup_sizes) - 1);
}
cl_uint

View File

@@ -849,7 +849,7 @@ impl Device {
pub fn subgroup_sizes(&self) -> Vec<usize> {
let subgroup_size = ComputeParam::<u32>::compute_param(
self.screen.as_ref(),
pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZE,
pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
);
SetBitIndices::from_msb(subgroup_size)

View File

@@ -1141,7 +1141,7 @@ enum pipe_compute_cap
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
PIPE_COMPUTE_CAP_MAX_SUBGROUPS,
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
PIPE_COMPUTE_CAP_SUBGROUP_SIZE,
PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK,
};